| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 2115, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00047281323877068556, |
| "grad_norm": 5.163570880889893, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 1.4628, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009456264775413711, |
| "grad_norm": 6.298020839691162, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 1.5003, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0014184397163120568, |
| "grad_norm": 5.853623390197754, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 1.4495, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0018912529550827422, |
| "grad_norm": 5.456025123596191, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.3798, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.002364066193853428, |
| "grad_norm": 5.757407188415527, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.4515, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0028368794326241137, |
| "grad_norm": 5.872277736663818, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 1.4424, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.003309692671394799, |
| "grad_norm": 6.7816009521484375, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 1.4004, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0037825059101654845, |
| "grad_norm": 6.229667663574219, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.4494, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.00425531914893617, |
| "grad_norm": 5.336202621459961, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 1.3916, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.004728132387706856, |
| "grad_norm": 5.589445114135742, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.2318, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005200945626477541, |
| "grad_norm": 5.720539569854736, |
| "learning_rate": 5.5e-07, |
| "loss": 1.4367, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.005673758865248227, |
| "grad_norm": 5.913913726806641, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.342, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.006146572104018913, |
| "grad_norm": 5.899744987487793, |
| "learning_rate": 6.5e-07, |
| "loss": 1.4307, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.006619385342789598, |
| "grad_norm": 5.571037292480469, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 1.3372, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0070921985815602835, |
| "grad_norm": 5.480010509490967, |
| "learning_rate": 7.5e-07, |
| "loss": 1.3923, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.007565011820330969, |
| "grad_norm": 5.254702091217041, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.2928, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.008037825059101654, |
| "grad_norm": 6.090312480926514, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 1.4984, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00851063829787234, |
| "grad_norm": 5.689319610595703, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.4108, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.008983451536643027, |
| "grad_norm": 5.386685848236084, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.425, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.009456264775413711, |
| "grad_norm": 6.451584815979004, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.5507, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.009929078014184398, |
| "grad_norm": 5.37647008895874, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.4109, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.010401891252955082, |
| "grad_norm": 4.716553211212158, |
| "learning_rate": 1.1e-06, |
| "loss": 1.2028, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.010874704491725768, |
| "grad_norm": 4.950989723205566, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.3043, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.011347517730496455, |
| "grad_norm": 4.688975811004639, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.2708, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.01182033096926714, |
| "grad_norm": 4.905868053436279, |
| "learning_rate": 1.25e-06, |
| "loss": 1.3268, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.012293144208037825, |
| "grad_norm": 4.503395080566406, |
| "learning_rate": 1.3e-06, |
| "loss": 1.1799, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01276595744680851, |
| "grad_norm": 4.77382230758667, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 1.3882, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.013238770685579196, |
| "grad_norm": 4.734329700469971, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.3476, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.013711583924349883, |
| "grad_norm": 4.775066375732422, |
| "learning_rate": 1.45e-06, |
| "loss": 1.2429, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.014184397163120567, |
| "grad_norm": 4.978334426879883, |
| "learning_rate": 1.5e-06, |
| "loss": 1.2119, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.014657210401891253, |
| "grad_norm": 4.506785869598389, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 1.3157, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.015130023640661938, |
| "grad_norm": 4.007757186889648, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.1451, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.015602836879432624, |
| "grad_norm": 3.6621618270874023, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 1.093, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.01607565011820331, |
| "grad_norm": 3.8733766078948975, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 1.2289, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.016548463356973995, |
| "grad_norm": 4.3391900062561035, |
| "learning_rate": 1.75e-06, |
| "loss": 1.1453, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01702127659574468, |
| "grad_norm": 3.287623643875122, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 1.0257, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.017494089834515367, |
| "grad_norm": 3.591721773147583, |
| "learning_rate": 1.85e-06, |
| "loss": 0.9976, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.017966903073286054, |
| "grad_norm": 4.028271675109863, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 1.0773, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.018439716312056736, |
| "grad_norm": 3.3543951511383057, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 1.1677, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.018912529550827423, |
| "grad_norm": 3.807624340057373, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.1232, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01938534278959811, |
| "grad_norm": 4.242797374725342, |
| "learning_rate": 2.05e-06, |
| "loss": 1.1819, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.019858156028368795, |
| "grad_norm": 3.4574992656707764, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.9878, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.02033096926713948, |
| "grad_norm": 3.906695604324341, |
| "learning_rate": 2.15e-06, |
| "loss": 1.0592, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.020803782505910164, |
| "grad_norm": 3.7543163299560547, |
| "learning_rate": 2.2e-06, |
| "loss": 1.0309, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.02127659574468085, |
| "grad_norm": 3.3777148723602295, |
| "learning_rate": 2.25e-06, |
| "loss": 1.0664, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.021749408983451537, |
| "grad_norm": 3.6003634929656982, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 1.0482, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.022222222222222223, |
| "grad_norm": 3.3961377143859863, |
| "learning_rate": 2.35e-06, |
| "loss": 1.0252, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02269503546099291, |
| "grad_norm": 3.1601035594940186, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 1.0435, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.023167848699763592, |
| "grad_norm": 3.4192967414855957, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 1.0935, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.02364066193853428, |
| "grad_norm": 3.1225922107696533, |
| "learning_rate": 2.5e-06, |
| "loss": 0.8988, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.024113475177304965, |
| "grad_norm": 3.1423380374908447, |
| "learning_rate": 2.55e-06, |
| "loss": 1.0159, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.02458628841607565, |
| "grad_norm": 3.4782402515411377, |
| "learning_rate": 2.6e-06, |
| "loss": 1.0231, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.025059101654846337, |
| "grad_norm": 3.8362693786621094, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 1.0725, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02553191489361702, |
| "grad_norm": 3.033294916152954, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 0.9377, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.026004728132387706, |
| "grad_norm": 3.849741220474243, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 1.0046, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.026477541371158392, |
| "grad_norm": 3.141876220703125, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.9226, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.02695035460992908, |
| "grad_norm": 2.773594856262207, |
| "learning_rate": 2.85e-06, |
| "loss": 0.8662, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.027423167848699765, |
| "grad_norm": 3.1460225582122803, |
| "learning_rate": 2.9e-06, |
| "loss": 0.9304, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.027895981087470448, |
| "grad_norm": 3.293583631515503, |
| "learning_rate": 2.95e-06, |
| "loss": 1.0374, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.028368794326241134, |
| "grad_norm": 3.8190863132476807, |
| "learning_rate": 3e-06, |
| "loss": 0.971, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02884160756501182, |
| "grad_norm": 3.4566776752471924, |
| "learning_rate": 3.05e-06, |
| "loss": 0.9631, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.029314420803782507, |
| "grad_norm": 3.355741500854492, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 1.0097, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.029787234042553193, |
| "grad_norm": 3.29746675491333, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 0.9459, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.030260047281323876, |
| "grad_norm": 3.3122968673706055, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.8594, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.030732860520094562, |
| "grad_norm": 3.477701187133789, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 0.9197, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.031205673758865248, |
| "grad_norm": 3.3363406658172607, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 0.9478, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.03167848699763593, |
| "grad_norm": 4.143295764923096, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 1.0534, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.03215130023640662, |
| "grad_norm": 3.2363274097442627, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.9454, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.032624113475177303, |
| "grad_norm": 3.198746681213379, |
| "learning_rate": 3.45e-06, |
| "loss": 0.9388, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03309692671394799, |
| "grad_norm": 3.5751023292541504, |
| "learning_rate": 3.5e-06, |
| "loss": 0.9444, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.033569739952718676, |
| "grad_norm": 3.1745729446411133, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 0.8683, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03404255319148936, |
| "grad_norm": 3.3210883140563965, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.8811, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.03451536643026005, |
| "grad_norm": 3.2502429485321045, |
| "learning_rate": 3.65e-06, |
| "loss": 1.0012, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.034988179669030735, |
| "grad_norm": 3.44598126411438, |
| "learning_rate": 3.7e-06, |
| "loss": 0.9217, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.03546099290780142, |
| "grad_norm": 3.439117431640625, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.8976, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.03593380614657211, |
| "grad_norm": 3.523627758026123, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.8996, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03640661938534279, |
| "grad_norm": 3.3716015815734863, |
| "learning_rate": 3.85e-06, |
| "loss": 0.9061, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03687943262411347, |
| "grad_norm": 3.33518385887146, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 0.9371, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.03735224586288416, |
| "grad_norm": 3.833829879760742, |
| "learning_rate": 3.95e-06, |
| "loss": 0.9669, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.037825059101654845, |
| "grad_norm": 3.260446786880493, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.9449, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03829787234042553, |
| "grad_norm": 3.532451629638672, |
| "learning_rate": 4.05e-06, |
| "loss": 0.897, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.03877068557919622, |
| "grad_norm": 3.1156492233276367, |
| "learning_rate": 4.1e-06, |
| "loss": 0.8463, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.039243498817966904, |
| "grad_norm": 2.8801751136779785, |
| "learning_rate": 4.15e-06, |
| "loss": 0.8616, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.03971631205673759, |
| "grad_norm": 3.072476863861084, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.8387, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.04018912529550828, |
| "grad_norm": 2.9601376056671143, |
| "learning_rate": 4.25e-06, |
| "loss": 0.8538, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.04066193853427896, |
| "grad_norm": 3.521664619445801, |
| "learning_rate": 4.3e-06, |
| "loss": 0.8894, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.04113475177304964, |
| "grad_norm": 3.2670981884002686, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 0.8387, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.04160756501182033, |
| "grad_norm": 3.422089099884033, |
| "learning_rate": 4.4e-06, |
| "loss": 0.7728, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.042080378250591015, |
| "grad_norm": 3.414034128189087, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 0.7968, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 4.234285354614258, |
| "learning_rate": 4.5e-06, |
| "loss": 0.8502, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.04302600472813239, |
| "grad_norm": 3.1446919441223145, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 0.8236, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.043498817966903074, |
| "grad_norm": 3.683443307876587, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.9792, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.04397163120567376, |
| "grad_norm": 3.664219617843628, |
| "learning_rate": 4.65e-06, |
| "loss": 0.8743, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.044444444444444446, |
| "grad_norm": 3.369479179382324, |
| "learning_rate": 4.7e-06, |
| "loss": 0.8741, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.04491725768321513, |
| "grad_norm": 3.694949150085449, |
| "learning_rate": 4.75e-06, |
| "loss": 0.7574, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04539007092198582, |
| "grad_norm": 3.5144498348236084, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.9934, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0458628841607565, |
| "grad_norm": 3.164451837539673, |
| "learning_rate": 4.85e-06, |
| "loss": 0.7463, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.046335697399527184, |
| "grad_norm": 3.222785472869873, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.7698, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.04680851063829787, |
| "grad_norm": 2.9129555225372314, |
| "learning_rate": 4.95e-06, |
| "loss": 0.7856, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.04728132387706856, |
| "grad_norm": 3.5061235427856445, |
| "learning_rate": 5e-06, |
| "loss": 0.8588, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04775413711583924, |
| "grad_norm": 3.2805044651031494, |
| "learning_rate": 4.999999922167982e-06, |
| "loss": 0.7643, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.04822695035460993, |
| "grad_norm": 3.5461678504943848, |
| "learning_rate": 4.999999688671929e-06, |
| "loss": 0.8253, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.048699763593380616, |
| "grad_norm": 3.2238264083862305, |
| "learning_rate": 4.99999929951186e-06, |
| "loss": 0.7622, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0491725768321513, |
| "grad_norm": 3.818955898284912, |
| "learning_rate": 4.999998754687795e-06, |
| "loss": 0.8471, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.04964539007092199, |
| "grad_norm": 3.1252424716949463, |
| "learning_rate": 4.99999805419977e-06, |
| "loss": 0.8409, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.050118203309692674, |
| "grad_norm": 3.604283571243286, |
| "learning_rate": 4.999997198047828e-06, |
| "loss": 0.9027, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.050591016548463354, |
| "grad_norm": 3.6752424240112305, |
| "learning_rate": 4.999996186232023e-06, |
| "loss": 0.9336, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.05106382978723404, |
| "grad_norm": 3.517557144165039, |
| "learning_rate": 4.9999950187524184e-06, |
| "loss": 0.8351, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.051536643026004726, |
| "grad_norm": 3.427285671234131, |
| "learning_rate": 4.999993695609085e-06, |
| "loss": 0.8457, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.05200945626477541, |
| "grad_norm": 3.2792510986328125, |
| "learning_rate": 4.999992216802107e-06, |
| "loss": 0.8391, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0524822695035461, |
| "grad_norm": 3.581094741821289, |
| "learning_rate": 4.999990582331576e-06, |
| "loss": 0.7533, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.052955082742316785, |
| "grad_norm": 3.1667377948760986, |
| "learning_rate": 4.999988792197593e-06, |
| "loss": 0.9562, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05342789598108747, |
| "grad_norm": 3.3609890937805176, |
| "learning_rate": 4.99998684640027e-06, |
| "loss": 0.8181, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.05390070921985816, |
| "grad_norm": 3.260627269744873, |
| "learning_rate": 4.999984744939729e-06, |
| "loss": 0.8012, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.054373522458628844, |
| "grad_norm": 3.4535653591156006, |
| "learning_rate": 4.9999824878160985e-06, |
| "loss": 0.919, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.05484633569739953, |
| "grad_norm": 3.4880740642547607, |
| "learning_rate": 4.999980075029522e-06, |
| "loss": 0.8114, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.05531914893617021, |
| "grad_norm": 3.2546932697296143, |
| "learning_rate": 4.999977506580147e-06, |
| "loss": 0.8274, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.055791962174940896, |
| "grad_norm": 3.2762744426727295, |
| "learning_rate": 4.999974782468136e-06, |
| "loss": 0.9018, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.05626477541371158, |
| "grad_norm": 3.42825984954834, |
| "learning_rate": 4.999971902693657e-06, |
| "loss": 0.8262, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.05673758865248227, |
| "grad_norm": 3.082496404647827, |
| "learning_rate": 4.99996886725689e-06, |
| "loss": 0.8181, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.057210401891252954, |
| "grad_norm": 3.322869300842285, |
| "learning_rate": 4.9999656761580225e-06, |
| "loss": 0.8382, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.05768321513002364, |
| "grad_norm": 3.6365339756011963, |
| "learning_rate": 4.9999623293972555e-06, |
| "loss": 0.7489, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.05815602836879433, |
| "grad_norm": 3.376352548599243, |
| "learning_rate": 4.999958826974796e-06, |
| "loss": 0.9012, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.05862884160756501, |
| "grad_norm": 3.49088716506958, |
| "learning_rate": 4.999955168890862e-06, |
| "loss": 0.8999, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.0591016548463357, |
| "grad_norm": 3.3265068531036377, |
| "learning_rate": 4.999951355145682e-06, |
| "loss": 0.8161, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.059574468085106386, |
| "grad_norm": 3.697282314300537, |
| "learning_rate": 4.999947385739493e-06, |
| "loss": 0.9623, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.06004728132387707, |
| "grad_norm": 2.7901928424835205, |
| "learning_rate": 4.999943260672542e-06, |
| "loss": 0.7371, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.06052009456264775, |
| "grad_norm": 3.110319137573242, |
| "learning_rate": 4.999938979945086e-06, |
| "loss": 0.715, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.06099290780141844, |
| "grad_norm": 3.2211520671844482, |
| "learning_rate": 4.999934543557392e-06, |
| "loss": 0.8888, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.061465721040189124, |
| "grad_norm": 3.2466187477111816, |
| "learning_rate": 4.999929951509735e-06, |
| "loss": 0.9389, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06193853427895981, |
| "grad_norm": 3.3574399948120117, |
| "learning_rate": 4.999925203802403e-06, |
| "loss": 0.8263, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.062411347517730496, |
| "grad_norm": 3.275601625442505, |
| "learning_rate": 4.99992030043569e-06, |
| "loss": 0.8338, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06288416075650118, |
| "grad_norm": 3.6011312007904053, |
| "learning_rate": 4.999915241409902e-06, |
| "loss": 0.8351, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.06335697399527186, |
| "grad_norm": 2.969011068344116, |
| "learning_rate": 4.999910026725352e-06, |
| "loss": 0.79, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.06382978723404255, |
| "grad_norm": 3.690784454345703, |
| "learning_rate": 4.999904656382369e-06, |
| "loss": 0.8209, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06430260047281323, |
| "grad_norm": 3.3363115787506104, |
| "learning_rate": 4.999899130381283e-06, |
| "loss": 0.858, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.06477541371158392, |
| "grad_norm": 3.206881523132324, |
| "learning_rate": 4.9998934487224405e-06, |
| "loss": 0.834, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06524822695035461, |
| "grad_norm": 2.773146152496338, |
| "learning_rate": 4.999887611406195e-06, |
| "loss": 0.7576, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0657210401891253, |
| "grad_norm": 3.307725667953491, |
| "learning_rate": 4.999881618432908e-06, |
| "loss": 0.7487, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.06619385342789598, |
| "grad_norm": 4.273657321929932, |
| "learning_rate": 4.999875469802956e-06, |
| "loss": 0.8176, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 3.0898005962371826, |
| "learning_rate": 4.999869165516719e-06, |
| "loss": 0.7578, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.06713947990543735, |
| "grad_norm": 3.25150990486145, |
| "learning_rate": 4.9998627055745915e-06, |
| "loss": 0.7873, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.06761229314420804, |
| "grad_norm": 2.9705755710601807, |
| "learning_rate": 4.999856089976974e-06, |
| "loss": 0.6473, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.06808510638297872, |
| "grad_norm": 3.5658507347106934, |
| "learning_rate": 4.9998493187242804e-06, |
| "loss": 0.855, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.06855791962174941, |
| "grad_norm": 3.3994076251983643, |
| "learning_rate": 4.99984239181693e-06, |
| "loss": 0.7926, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.0690307328605201, |
| "grad_norm": 2.8266260623931885, |
| "learning_rate": 4.999835309255357e-06, |
| "loss": 0.7564, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.06950354609929078, |
| "grad_norm": 3.1143875122070312, |
| "learning_rate": 4.999828071039999e-06, |
| "loss": 0.8398, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.06997635933806147, |
| "grad_norm": 2.9364278316497803, |
| "learning_rate": 4.99982067717131e-06, |
| "loss": 0.7381, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.07044917257683216, |
| "grad_norm": 3.4155616760253906, |
| "learning_rate": 4.999813127649748e-06, |
| "loss": 0.7933, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.07092198581560284, |
| "grad_norm": 4.371236324310303, |
| "learning_rate": 4.999805422475784e-06, |
| "loss": 0.8292, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07139479905437353, |
| "grad_norm": 3.3967185020446777, |
| "learning_rate": 4.999797561649897e-06, |
| "loss": 0.8712, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.07186761229314421, |
| "grad_norm": 3.343303680419922, |
| "learning_rate": 4.999789545172578e-06, |
| "loss": 0.8177, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.07234042553191489, |
| "grad_norm": 3.040235757827759, |
| "learning_rate": 4.999781373044325e-06, |
| "loss": 0.7379, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07281323877068557, |
| "grad_norm": 3.4069204330444336, |
| "learning_rate": 4.999773045265647e-06, |
| "loss": 0.7939, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.07328605200945626, |
| "grad_norm": 3.1939475536346436, |
| "learning_rate": 4.999764561837063e-06, |
| "loss": 0.8037, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07375886524822695, |
| "grad_norm": 4.452004909515381, |
| "learning_rate": 4.999755922759101e-06, |
| "loss": 0.8421, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.07423167848699763, |
| "grad_norm": 3.2031240463256836, |
| "learning_rate": 4.999747128032298e-06, |
| "loss": 0.794, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.07470449172576832, |
| "grad_norm": 3.175920009613037, |
| "learning_rate": 4.999738177657203e-06, |
| "loss": 0.759, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.075177304964539, |
| "grad_norm": 3.7679688930511475, |
| "learning_rate": 4.9997290716343725e-06, |
| "loss": 0.8174, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.07565011820330969, |
| "grad_norm": 3.7020037174224854, |
| "learning_rate": 4.999719809964373e-06, |
| "loss": 0.7116, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07612293144208038, |
| "grad_norm": 4.357471942901611, |
| "learning_rate": 4.999710392647783e-06, |
| "loss": 0.7649, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.07659574468085106, |
| "grad_norm": 3.3439087867736816, |
| "learning_rate": 4.999700819685187e-06, |
| "loss": 0.7907, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.07706855791962175, |
| "grad_norm": 3.210815191268921, |
| "learning_rate": 4.999691091077182e-06, |
| "loss": 0.8446, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.07754137115839244, |
| "grad_norm": 3.1029553413391113, |
| "learning_rate": 4.9996812068243735e-06, |
| "loss": 0.7232, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.07801418439716312, |
| "grad_norm": 2.9389400482177734, |
| "learning_rate": 4.999671166927378e-06, |
| "loss": 0.7413, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.07848699763593381, |
| "grad_norm": 3.7062697410583496, |
| "learning_rate": 4.9996609713868185e-06, |
| "loss": 0.8773, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.0789598108747045, |
| "grad_norm": 3.2768924236297607, |
| "learning_rate": 4.999650620203332e-06, |
| "loss": 0.8046, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.07943262411347518, |
| "grad_norm": 3.380373001098633, |
| "learning_rate": 4.999640113377561e-06, |
| "loss": 0.7529, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.07990543735224587, |
| "grad_norm": 3.520022392272949, |
| "learning_rate": 4.999629450910162e-06, |
| "loss": 0.7352, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.08037825059101655, |
| "grad_norm": 3.43269419670105, |
| "learning_rate": 4.999618632801796e-06, |
| "loss": 0.9371, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08085106382978724, |
| "grad_norm": 3.555877923965454, |
| "learning_rate": 4.99960765905314e-06, |
| "loss": 0.8276, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.08132387706855793, |
| "grad_norm": 3.597050189971924, |
| "learning_rate": 4.999596529664874e-06, |
| "loss": 0.8164, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.0817966903073286, |
| "grad_norm": 3.2002956867218018, |
| "learning_rate": 4.999585244637693e-06, |
| "loss": 0.7824, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.08226950354609928, |
| "grad_norm": 3.527275562286377, |
| "learning_rate": 4.999573803972299e-06, |
| "loss": 0.8033, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.08274231678486997, |
| "grad_norm": 3.5184452533721924, |
| "learning_rate": 4.999562207669405e-06, |
| "loss": 0.724, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08321513002364066, |
| "grad_norm": 3.6635067462921143, |
| "learning_rate": 4.999550455729732e-06, |
| "loss": 0.819, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.08368794326241134, |
| "grad_norm": 3.192399740219116, |
| "learning_rate": 4.999538548154012e-06, |
| "loss": 0.7999, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.08416075650118203, |
| "grad_norm": 3.0946953296661377, |
| "learning_rate": 4.999526484942988e-06, |
| "loss": 0.7367, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08463356973995272, |
| "grad_norm": 2.847198009490967, |
| "learning_rate": 4.99951426609741e-06, |
| "loss": 0.7536, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 2.7674827575683594, |
| "learning_rate": 4.999501891618037e-06, |
| "loss": 0.701, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08557919621749409, |
| "grad_norm": 3.357933521270752, |
| "learning_rate": 4.999489361505643e-06, |
| "loss": 0.8331, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.08605200945626477, |
| "grad_norm": 3.1464426517486572, |
| "learning_rate": 4.999476675761004e-06, |
| "loss": 0.7931, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.08652482269503546, |
| "grad_norm": 3.310697078704834, |
| "learning_rate": 4.999463834384915e-06, |
| "loss": 0.753, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.08699763593380615, |
| "grad_norm": 2.9794881343841553, |
| "learning_rate": 4.999450837378171e-06, |
| "loss": 0.7091, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.08747044917257683, |
| "grad_norm": 3.0776889324188232, |
| "learning_rate": 4.999437684741584e-06, |
| "loss": 0.7226, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08794326241134752, |
| "grad_norm": 3.6657519340515137, |
| "learning_rate": 4.999424376475972e-06, |
| "loss": 0.845, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.0884160756501182, |
| "grad_norm": 3.872718572616577, |
| "learning_rate": 4.999410912582164e-06, |
| "loss": 0.812, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.08888888888888889, |
| "grad_norm": 2.9184508323669434, |
| "learning_rate": 4.9993972930609976e-06, |
| "loss": 0.6823, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.08936170212765958, |
| "grad_norm": 3.5567142963409424, |
| "learning_rate": 4.999383517913321e-06, |
| "loss": 0.7614, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.08983451536643026, |
| "grad_norm": 3.3688533306121826, |
| "learning_rate": 4.999369587139992e-06, |
| "loss": 0.858, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.09030732860520095, |
| "grad_norm": 2.893223524093628, |
| "learning_rate": 4.99935550074188e-06, |
| "loss": 0.6761, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.09078014184397164, |
| "grad_norm": 3.400225877761841, |
| "learning_rate": 4.999341258719859e-06, |
| "loss": 0.7531, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.09125295508274232, |
| "grad_norm": 3.6167714595794678, |
| "learning_rate": 4.999326861074817e-06, |
| "loss": 0.8164, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.091725768321513, |
| "grad_norm": 4.325016498565674, |
| "learning_rate": 4.9993123078076506e-06, |
| "loss": 0.7069, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.09219858156028368, |
| "grad_norm": 3.195317029953003, |
| "learning_rate": 4.999297598919266e-06, |
| "loss": 0.726, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.09267139479905437, |
| "grad_norm": 3.146530866622925, |
| "learning_rate": 4.999282734410579e-06, |
| "loss": 0.7888, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.09314420803782505, |
| "grad_norm": 3.5166752338409424, |
| "learning_rate": 4.999267714282515e-06, |
| "loss": 0.8473, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09361702127659574, |
| "grad_norm": 3.3140196800231934, |
| "learning_rate": 4.99925253853601e-06, |
| "loss": 0.7233, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.09408983451536643, |
| "grad_norm": 3.0318164825439453, |
| "learning_rate": 4.999237207172008e-06, |
| "loss": 0.7543, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.09456264775413711, |
| "grad_norm": 3.662214756011963, |
| "learning_rate": 4.999221720191464e-06, |
| "loss": 0.7783, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0950354609929078, |
| "grad_norm": 3.452078104019165, |
| "learning_rate": 4.9992060775953425e-06, |
| "loss": 0.7868, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.09550827423167849, |
| "grad_norm": 3.4051287174224854, |
| "learning_rate": 4.999190279384617e-06, |
| "loss": 0.7849, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.09598108747044917, |
| "grad_norm": 3.1377196311950684, |
| "learning_rate": 4.999174325560271e-06, |
| "loss": 0.8364, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.09645390070921986, |
| "grad_norm": 3.129473924636841, |
| "learning_rate": 4.999158216123299e-06, |
| "loss": 0.7458, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.09692671394799054, |
| "grad_norm": 3.169548749923706, |
| "learning_rate": 4.999141951074703e-06, |
| "loss": 0.7256, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.09739952718676123, |
| "grad_norm": 3.186009168624878, |
| "learning_rate": 4.999125530415495e-06, |
| "loss": 0.783, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.09787234042553192, |
| "grad_norm": 3.0995123386383057, |
| "learning_rate": 4.9991089541467e-06, |
| "loss": 0.7519, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.0983451536643026, |
| "grad_norm": 3.1854088306427, |
| "learning_rate": 4.999092222269348e-06, |
| "loss": 0.7444, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.09881796690307329, |
| "grad_norm": 3.1512246131896973, |
| "learning_rate": 4.999075334784482e-06, |
| "loss": 0.7882, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.09929078014184398, |
| "grad_norm": 3.6199698448181152, |
| "learning_rate": 4.999058291693153e-06, |
| "loss": 0.8048, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.09976359338061466, |
| "grad_norm": 2.956907272338867, |
| "learning_rate": 4.999041092996422e-06, |
| "loss": 0.7663, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.10023640661938535, |
| "grad_norm": 3.3493971824645996, |
| "learning_rate": 4.99902373869536e-06, |
| "loss": 0.7639, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.10070921985815603, |
| "grad_norm": 3.144812822341919, |
| "learning_rate": 4.9990062287910475e-06, |
| "loss": 0.7953, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.10118203309692671, |
| "grad_norm": 3.5986971855163574, |
| "learning_rate": 4.998988563284576e-06, |
| "loss": 0.8297, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1016548463356974, |
| "grad_norm": 3.447584867477417, |
| "learning_rate": 4.998970742177044e-06, |
| "loss": 0.808, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.10212765957446808, |
| "grad_norm": 3.791353940963745, |
| "learning_rate": 4.998952765469562e-06, |
| "loss": 0.8005, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.10260047281323877, |
| "grad_norm": 3.4490807056427, |
| "learning_rate": 4.998934633163247e-06, |
| "loss": 0.8135, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.10307328605200945, |
| "grad_norm": 3.1053314208984375, |
| "learning_rate": 4.998916345259232e-06, |
| "loss": 0.7888, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.10354609929078014, |
| "grad_norm": 3.407862663269043, |
| "learning_rate": 4.9988979017586514e-06, |
| "loss": 0.7099, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.10401891252955082, |
| "grad_norm": 3.116656541824341, |
| "learning_rate": 4.998879302662658e-06, |
| "loss": 0.8344, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.10449172576832151, |
| "grad_norm": 3.339264154434204, |
| "learning_rate": 4.998860547972406e-06, |
| "loss": 0.8496, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.1049645390070922, |
| "grad_norm": 3.251892566680908, |
| "learning_rate": 4.998841637689066e-06, |
| "loss": 0.7455, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.10543735224586288, |
| "grad_norm": 4.098135471343994, |
| "learning_rate": 4.998822571813814e-06, |
| "loss": 0.7772, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.10591016548463357, |
| "grad_norm": 3.9871134757995605, |
| "learning_rate": 4.998803350347837e-06, |
| "loss": 0.8261, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.10638297872340426, |
| "grad_norm": 3.2822303771972656, |
| "learning_rate": 4.998783973292333e-06, |
| "loss": 0.8623, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.10685579196217494, |
| "grad_norm": 3.0356857776641846, |
| "learning_rate": 4.998764440648507e-06, |
| "loss": 0.7426, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.10732860520094563, |
| "grad_norm": 2.8932785987854004, |
| "learning_rate": 4.998744752417576e-06, |
| "loss": 0.6741, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.10780141843971631, |
| "grad_norm": 3.085820436477661, |
| "learning_rate": 4.998724908600767e-06, |
| "loss": 0.6549, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.108274231678487, |
| "grad_norm": 3.135829210281372, |
| "learning_rate": 4.998704909199314e-06, |
| "loss": 0.6702, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.10874704491725769, |
| "grad_norm": 5.016134262084961, |
| "learning_rate": 4.9986847542144625e-06, |
| "loss": 0.7852, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.10921985815602837, |
| "grad_norm": 3.9056200981140137, |
| "learning_rate": 4.998664443647468e-06, |
| "loss": 0.9654, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.10969267139479906, |
| "grad_norm": 3.0880749225616455, |
| "learning_rate": 4.998643977499595e-06, |
| "loss": 0.7579, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.11016548463356975, |
| "grad_norm": 3.6893601417541504, |
| "learning_rate": 4.998623355772118e-06, |
| "loss": 0.713, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.11063829787234042, |
| "grad_norm": 4.181536674499512, |
| "learning_rate": 4.998602578466319e-06, |
| "loss": 0.7331, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 3.036386728286743, |
| "learning_rate": 4.998581645583496e-06, |
| "loss": 0.7115, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.11158392434988179, |
| "grad_norm": 3.6333255767822266, |
| "learning_rate": 4.998560557124948e-06, |
| "loss": 0.7544, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.11205673758865248, |
| "grad_norm": 2.926417827606201, |
| "learning_rate": 4.9985393130919915e-06, |
| "loss": 0.715, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.11252955082742316, |
| "grad_norm": 2.969158172607422, |
| "learning_rate": 4.998517913485946e-06, |
| "loss": 0.7304, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.11300236406619385, |
| "grad_norm": 3.5254971981048584, |
| "learning_rate": 4.9984963583081466e-06, |
| "loss": 0.7725, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.11347517730496454, |
| "grad_norm": 3.7840335369110107, |
| "learning_rate": 4.998474647559936e-06, |
| "loss": 0.8685, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.11394799054373522, |
| "grad_norm": 3.0333125591278076, |
| "learning_rate": 4.9984527812426625e-06, |
| "loss": 0.7793, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.11442080378250591, |
| "grad_norm": 3.290159225463867, |
| "learning_rate": 4.99843075935769e-06, |
| "loss": 0.7158, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1148936170212766, |
| "grad_norm": 3.3935494422912598, |
| "learning_rate": 4.99840858190639e-06, |
| "loss": 0.7643, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.11536643026004728, |
| "grad_norm": 3.333965539932251, |
| "learning_rate": 4.998386248890142e-06, |
| "loss": 0.7255, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.11583924349881797, |
| "grad_norm": 2.8129613399505615, |
| "learning_rate": 4.998363760310339e-06, |
| "loss": 0.768, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11631205673758865, |
| "grad_norm": 2.8678107261657715, |
| "learning_rate": 4.998341116168378e-06, |
| "loss": 0.7403, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.11678486997635934, |
| "grad_norm": 2.8898239135742188, |
| "learning_rate": 4.998318316465672e-06, |
| "loss": 0.6844, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.11725768321513003, |
| "grad_norm": 3.139777898788452, |
| "learning_rate": 4.998295361203637e-06, |
| "loss": 0.7936, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.11773049645390071, |
| "grad_norm": 3.393721103668213, |
| "learning_rate": 4.998272250383707e-06, |
| "loss": 0.8173, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.1182033096926714, |
| "grad_norm": 3.240973949432373, |
| "learning_rate": 4.998248984007318e-06, |
| "loss": 0.8252, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11867612293144209, |
| "grad_norm": 3.384855031967163, |
| "learning_rate": 4.998225562075918e-06, |
| "loss": 0.7244, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.11914893617021277, |
| "grad_norm": 3.1881816387176514, |
| "learning_rate": 4.9982019845909675e-06, |
| "loss": 0.6818, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.11962174940898346, |
| "grad_norm": 2.888364553451538, |
| "learning_rate": 4.998178251553934e-06, |
| "loss": 0.6753, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.12009456264775414, |
| "grad_norm": 3.630093812942505, |
| "learning_rate": 4.9981543629662944e-06, |
| "loss": 0.7995, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.12056737588652482, |
| "grad_norm": 2.9820947647094727, |
| "learning_rate": 4.998130318829537e-06, |
| "loss": 0.7478, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.1210401891252955, |
| "grad_norm": 2.7094738483428955, |
| "learning_rate": 4.998106119145159e-06, |
| "loss": 0.7237, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.12151300236406619, |
| "grad_norm": 3.1808104515075684, |
| "learning_rate": 4.9980817639146665e-06, |
| "loss": 0.7915, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.12198581560283688, |
| "grad_norm": 3.1661291122436523, |
| "learning_rate": 4.998057253139575e-06, |
| "loss": 0.8053, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.12245862884160756, |
| "grad_norm": 3.528749942779541, |
| "learning_rate": 4.998032586821413e-06, |
| "loss": 0.7946, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.12293144208037825, |
| "grad_norm": 3.125964879989624, |
| "learning_rate": 4.998007764961716e-06, |
| "loss": 0.7569, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.12340425531914893, |
| "grad_norm": 3.0778942108154297, |
| "learning_rate": 4.997982787562029e-06, |
| "loss": 0.7184, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.12387706855791962, |
| "grad_norm": 3.3531930446624756, |
| "learning_rate": 4.997957654623906e-06, |
| "loss": 0.7586, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.1243498817966903, |
| "grad_norm": 3.229278564453125, |
| "learning_rate": 4.997932366148913e-06, |
| "loss": 0.6092, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.12482269503546099, |
| "grad_norm": 3.7286155223846436, |
| "learning_rate": 4.997906922138626e-06, |
| "loss": 0.7965, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.12529550827423167, |
| "grad_norm": 3.300311803817749, |
| "learning_rate": 4.997881322594628e-06, |
| "loss": 0.7665, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.12576832151300235, |
| "grad_norm": 3.411482572555542, |
| "learning_rate": 4.9978555675185115e-06, |
| "loss": 0.7253, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.12624113475177304, |
| "grad_norm": 3.0884511470794678, |
| "learning_rate": 4.9978296569118825e-06, |
| "loss": 0.659, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.12671394799054372, |
| "grad_norm": 3.0652925968170166, |
| "learning_rate": 4.9978035907763535e-06, |
| "loss": 0.6739, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1271867612293144, |
| "grad_norm": 3.280555009841919, |
| "learning_rate": 4.997777369113547e-06, |
| "loss": 0.8003, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.1276595744680851, |
| "grad_norm": 2.980860948562622, |
| "learning_rate": 4.997750991925096e-06, |
| "loss": 0.7097, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.12813238770685578, |
| "grad_norm": 3.301760673522949, |
| "learning_rate": 4.997724459212644e-06, |
| "loss": 0.7894, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.12860520094562647, |
| "grad_norm": 2.9584903717041016, |
| "learning_rate": 4.997697770977841e-06, |
| "loss": 0.733, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.12907801418439716, |
| "grad_norm": 3.5632214546203613, |
| "learning_rate": 4.99767092722235e-06, |
| "loss": 0.7228, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.12955082742316784, |
| "grad_norm": 3.5900983810424805, |
| "learning_rate": 4.997643927947843e-06, |
| "loss": 0.7634, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.13002364066193853, |
| "grad_norm": 3.332650661468506, |
| "learning_rate": 4.997616773156e-06, |
| "loss": 0.797, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.13049645390070921, |
| "grad_norm": 3.1094167232513428, |
| "learning_rate": 4.997589462848512e-06, |
| "loss": 0.7849, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.1309692671394799, |
| "grad_norm": 3.5359463691711426, |
| "learning_rate": 4.99756199702708e-06, |
| "loss": 0.6871, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.1314420803782506, |
| "grad_norm": 3.190441846847534, |
| "learning_rate": 4.997534375693414e-06, |
| "loss": 0.6883, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.13191489361702127, |
| "grad_norm": 3.063518762588501, |
| "learning_rate": 4.997506598849234e-06, |
| "loss": 0.7586, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.13238770685579196, |
| "grad_norm": 3.4112050533294678, |
| "learning_rate": 4.997478666496269e-06, |
| "loss": 0.796, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.13286052009456265, |
| "grad_norm": 3.231886386871338, |
| "learning_rate": 4.997450578636259e-06, |
| "loss": 0.7714, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 3.279425621032715, |
| "learning_rate": 4.9974223352709515e-06, |
| "loss": 0.7793, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.13380614657210402, |
| "grad_norm": 3.2154316902160645, |
| "learning_rate": 4.9973939364021075e-06, |
| "loss": 0.791, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.1342789598108747, |
| "grad_norm": 3.2090768814086914, |
| "learning_rate": 4.9973653820314925e-06, |
| "loss": 0.6433, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.1347517730496454, |
| "grad_norm": 3.1712026596069336, |
| "learning_rate": 4.997336672160886e-06, |
| "loss": 0.8128, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13522458628841608, |
| "grad_norm": 2.929229497909546, |
| "learning_rate": 4.997307806792076e-06, |
| "loss": 0.7594, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.13569739952718676, |
| "grad_norm": 3.0363314151763916, |
| "learning_rate": 4.997278785926859e-06, |
| "loss": 0.7336, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.13617021276595745, |
| "grad_norm": 3.1352357864379883, |
| "learning_rate": 4.997249609567042e-06, |
| "loss": 0.7225, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.13664302600472814, |
| "grad_norm": 3.3171157836914062, |
| "learning_rate": 4.997220277714442e-06, |
| "loss": 0.7777, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.13711583924349882, |
| "grad_norm": 3.050717353820801, |
| "learning_rate": 4.997190790370885e-06, |
| "loss": 0.6836, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1375886524822695, |
| "grad_norm": 3.0297694206237793, |
| "learning_rate": 4.997161147538208e-06, |
| "loss": 0.6883, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.1380614657210402, |
| "grad_norm": 3.0566554069519043, |
| "learning_rate": 4.997131349218256e-06, |
| "loss": 0.6674, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.13853427895981088, |
| "grad_norm": 3.799111843109131, |
| "learning_rate": 4.997101395412885e-06, |
| "loss": 0.8256, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.13900709219858157, |
| "grad_norm": 3.1394248008728027, |
| "learning_rate": 4.9970712861239576e-06, |
| "loss": 0.7306, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.13947990543735225, |
| "grad_norm": 3.0605666637420654, |
| "learning_rate": 4.997041021353352e-06, |
| "loss": 0.7212, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.13995271867612294, |
| "grad_norm": 3.8813397884368896, |
| "learning_rate": 4.997010601102951e-06, |
| "loss": 0.769, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.14042553191489363, |
| "grad_norm": 3.0514819622039795, |
| "learning_rate": 4.996980025374649e-06, |
| "loss": 0.7422, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.1408983451536643, |
| "grad_norm": 2.9544146060943604, |
| "learning_rate": 4.99694929417035e-06, |
| "loss": 0.6912, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.141371158392435, |
| "grad_norm": 3.2635602951049805, |
| "learning_rate": 4.996918407491966e-06, |
| "loss": 0.7395, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "grad_norm": 3.373882532119751, |
| "learning_rate": 4.996887365341423e-06, |
| "loss": 0.7799, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.14231678486997637, |
| "grad_norm": 3.001128673553467, |
| "learning_rate": 4.996856167720652e-06, |
| "loss": 0.7168, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.14278959810874706, |
| "grad_norm": 3.1026835441589355, |
| "learning_rate": 4.996824814631595e-06, |
| "loss": 0.7492, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.14326241134751774, |
| "grad_norm": 3.41947603225708, |
| "learning_rate": 4.996793306076205e-06, |
| "loss": 0.6659, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.14373522458628843, |
| "grad_norm": 3.2272400856018066, |
| "learning_rate": 4.996761642056444e-06, |
| "loss": 0.7184, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.14420803782505912, |
| "grad_norm": 2.9488935470581055, |
| "learning_rate": 4.996729822574284e-06, |
| "loss": 0.7451, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.14468085106382977, |
| "grad_norm": 3.268231153488159, |
| "learning_rate": 4.9966978476317065e-06, |
| "loss": 0.7798, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.14515366430260046, |
| "grad_norm": 3.9086556434631348, |
| "learning_rate": 4.996665717230701e-06, |
| "loss": 0.7871, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.14562647754137115, |
| "grad_norm": 3.3483879566192627, |
| "learning_rate": 4.996633431373269e-06, |
| "loss": 0.7415, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.14609929078014183, |
| "grad_norm": 2.839400053024292, |
| "learning_rate": 4.99660099006142e-06, |
| "loss": 0.7192, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.14657210401891252, |
| "grad_norm": 3.177302598953247, |
| "learning_rate": 4.996568393297175e-06, |
| "loss": 0.755, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1470449172576832, |
| "grad_norm": 3.5477044582366943, |
| "learning_rate": 4.996535641082563e-06, |
| "loss": 0.7531, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.1475177304964539, |
| "grad_norm": 3.418576717376709, |
| "learning_rate": 4.996502733419624e-06, |
| "loss": 0.8009, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.14799054373522458, |
| "grad_norm": 3.711341619491577, |
| "learning_rate": 4.996469670310407e-06, |
| "loss": 0.7362, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.14846335697399526, |
| "grad_norm": 3.2419373989105225, |
| "learning_rate": 4.99643645175697e-06, |
| "loss": 0.7761, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.14893617021276595, |
| "grad_norm": 3.121858835220337, |
| "learning_rate": 4.996403077761381e-06, |
| "loss": 0.6495, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.14940898345153664, |
| "grad_norm": 3.123054265975952, |
| "learning_rate": 4.996369548325719e-06, |
| "loss": 0.7444, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.14988179669030732, |
| "grad_norm": 2.780880928039551, |
| "learning_rate": 4.996335863452072e-06, |
| "loss": 0.672, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.150354609929078, |
| "grad_norm": 3.3738629817962646, |
| "learning_rate": 4.996302023142536e-06, |
| "loss": 0.7972, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1508274231678487, |
| "grad_norm": 3.4874777793884277, |
| "learning_rate": 4.99626802739922e-06, |
| "loss": 0.8252, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.15130023640661938, |
| "grad_norm": 3.7074787616729736, |
| "learning_rate": 4.9962338762242395e-06, |
| "loss": 0.8216, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.15177304964539007, |
| "grad_norm": 3.281912326812744, |
| "learning_rate": 4.996199569619721e-06, |
| "loss": 0.8175, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.15224586288416075, |
| "grad_norm": 2.9485340118408203, |
| "learning_rate": 4.996165107587801e-06, |
| "loss": 0.707, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.15271867612293144, |
| "grad_norm": 3.3757646083831787, |
| "learning_rate": 4.996130490130625e-06, |
| "loss": 0.7955, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.15319148936170213, |
| "grad_norm": 2.962181568145752, |
| "learning_rate": 4.996095717250349e-06, |
| "loss": 0.7067, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.1536643026004728, |
| "grad_norm": 3.114272356033325, |
| "learning_rate": 4.996060788949136e-06, |
| "loss": 0.7486, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1541371158392435, |
| "grad_norm": 3.0621590614318848, |
| "learning_rate": 4.996025705229165e-06, |
| "loss": 0.6547, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.15460992907801419, |
| "grad_norm": 2.8745882511138916, |
| "learning_rate": 4.995990466092616e-06, |
| "loss": 0.6435, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.15508274231678487, |
| "grad_norm": 2.90841007232666, |
| "learning_rate": 4.995955071541686e-06, |
| "loss": 0.7331, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.15555555555555556, |
| "grad_norm": 2.694580316543579, |
| "learning_rate": 4.9959195215785784e-06, |
| "loss": 0.6731, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.15602836879432624, |
| "grad_norm": 3.158083438873291, |
| "learning_rate": 4.995883816205507e-06, |
| "loss": 0.7257, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15650118203309693, |
| "grad_norm": 3.3234715461730957, |
| "learning_rate": 4.995847955424694e-06, |
| "loss": 0.7389, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.15697399527186762, |
| "grad_norm": 2.9406495094299316, |
| "learning_rate": 4.995811939238373e-06, |
| "loss": 0.643, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.1574468085106383, |
| "grad_norm": 3.3191726207733154, |
| "learning_rate": 4.995775767648785e-06, |
| "loss": 0.7879, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.157919621749409, |
| "grad_norm": 3.711925745010376, |
| "learning_rate": 4.995739440658185e-06, |
| "loss": 0.7586, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.15839243498817968, |
| "grad_norm": 9.573421478271484, |
| "learning_rate": 4.995702958268833e-06, |
| "loss": 0.7842, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.15886524822695036, |
| "grad_norm": 3.4154508113861084, |
| "learning_rate": 4.995666320483001e-06, |
| "loss": 0.6735, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.15933806146572105, |
| "grad_norm": 3.4169859886169434, |
| "learning_rate": 4.995629527302971e-06, |
| "loss": 0.741, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.15981087470449173, |
| "grad_norm": 3.287503242492676, |
| "learning_rate": 4.9955925787310335e-06, |
| "loss": 0.7139, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.16028368794326242, |
| "grad_norm": 3.288409471511841, |
| "learning_rate": 4.995555474769488e-06, |
| "loss": 0.7636, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.1607565011820331, |
| "grad_norm": 2.8021693229675293, |
| "learning_rate": 4.995518215420646e-06, |
| "loss": 0.5883, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1612293144208038, |
| "grad_norm": 2.7038564682006836, |
| "learning_rate": 4.995480800686827e-06, |
| "loss": 0.657, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.16170212765957448, |
| "grad_norm": 3.2370235919952393, |
| "learning_rate": 4.9954432305703615e-06, |
| "loss": 0.6999, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.16217494089834517, |
| "grad_norm": 2.8666412830352783, |
| "learning_rate": 4.995405505073588e-06, |
| "loss": 0.7199, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.16264775413711585, |
| "grad_norm": 3.6467232704162598, |
| "learning_rate": 4.995367624198856e-06, |
| "loss": 0.7317, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.16312056737588654, |
| "grad_norm": 2.7576327323913574, |
| "learning_rate": 4.9953295879485246e-06, |
| "loss": 0.647, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.1635933806146572, |
| "grad_norm": 2.922232151031494, |
| "learning_rate": 4.995291396324959e-06, |
| "loss": 0.6686, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.16406619385342788, |
| "grad_norm": 2.8693501949310303, |
| "learning_rate": 4.995253049330542e-06, |
| "loss": 0.6756, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.16453900709219857, |
| "grad_norm": 3.671865701675415, |
| "learning_rate": 4.995214546967658e-06, |
| "loss": 0.7347, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.16501182033096926, |
| "grad_norm": 3.024219274520874, |
| "learning_rate": 4.995175889238706e-06, |
| "loss": 0.7547, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.16548463356973994, |
| "grad_norm": 2.8470778465270996, |
| "learning_rate": 4.995137076146091e-06, |
| "loss": 0.6764, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.16595744680851063, |
| "grad_norm": 2.905057907104492, |
| "learning_rate": 4.9950981076922324e-06, |
| "loss": 0.6814, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.16643026004728131, |
| "grad_norm": 3.504377841949463, |
| "learning_rate": 4.995058983879555e-06, |
| "loss": 0.7145, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.166903073286052, |
| "grad_norm": 3.0029661655426025, |
| "learning_rate": 4.995019704710495e-06, |
| "loss": 0.7114, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.1673758865248227, |
| "grad_norm": 2.8666274547576904, |
| "learning_rate": 4.994980270187499e-06, |
| "loss": 0.7416, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.16784869976359337, |
| "grad_norm": 3.1644718647003174, |
| "learning_rate": 4.994940680313021e-06, |
| "loss": 0.661, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.16832151300236406, |
| "grad_norm": 3.050391674041748, |
| "learning_rate": 4.994900935089527e-06, |
| "loss": 0.7243, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.16879432624113475, |
| "grad_norm": 2.985466480255127, |
| "learning_rate": 4.994861034519491e-06, |
| "loss": 0.6917, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.16926713947990543, |
| "grad_norm": 2.909342050552368, |
| "learning_rate": 4.9948209786053995e-06, |
| "loss": 0.6636, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.16973995271867612, |
| "grad_norm": 3.2214784622192383, |
| "learning_rate": 4.9947807673497435e-06, |
| "loss": 0.7903, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 2.5654983520507812, |
| "learning_rate": 4.994740400755029e-06, |
| "loss": 0.6129, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1706855791962175, |
| "grad_norm": 3.775646448135376, |
| "learning_rate": 4.99469987882377e-06, |
| "loss": 0.7145, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.17115839243498818, |
| "grad_norm": 2.8965413570404053, |
| "learning_rate": 4.994659201558487e-06, |
| "loss": 0.7177, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.17163120567375886, |
| "grad_norm": 3.485597848892212, |
| "learning_rate": 4.9946183689617146e-06, |
| "loss": 0.8107, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.17210401891252955, |
| "grad_norm": 3.277839183807373, |
| "learning_rate": 4.994577381035995e-06, |
| "loss": 0.691, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.17257683215130024, |
| "grad_norm": 2.8807685375213623, |
| "learning_rate": 4.99453623778388e-06, |
| "loss": 0.7627, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.17304964539007092, |
| "grad_norm": 3.0659940242767334, |
| "learning_rate": 4.994494939207932e-06, |
| "loss": 0.6858, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.1735224586288416, |
| "grad_norm": 3.0881855487823486, |
| "learning_rate": 4.994453485310723e-06, |
| "loss": 0.8212, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.1739952718676123, |
| "grad_norm": 2.7199201583862305, |
| "learning_rate": 4.994411876094832e-06, |
| "loss": 0.6516, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.17446808510638298, |
| "grad_norm": 2.955889940261841, |
| "learning_rate": 4.994370111562851e-06, |
| "loss": 0.6579, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.17494089834515367, |
| "grad_norm": 3.1321663856506348, |
| "learning_rate": 4.994328191717382e-06, |
| "loss": 0.6891, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.17541371158392435, |
| "grad_norm": 3.0560388565063477, |
| "learning_rate": 4.994286116561034e-06, |
| "loss": 0.7243, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.17588652482269504, |
| "grad_norm": 3.1560704708099365, |
| "learning_rate": 4.994243886096425e-06, |
| "loss": 0.7262, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.17635933806146573, |
| "grad_norm": 2.913541316986084, |
| "learning_rate": 4.994201500326187e-06, |
| "loss": 0.7318, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.1768321513002364, |
| "grad_norm": 3.098376512527466, |
| "learning_rate": 4.994158959252958e-06, |
| "loss": 0.6419, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.1773049645390071, |
| "grad_norm": 2.977508544921875, |
| "learning_rate": 4.994116262879387e-06, |
| "loss": 0.6709, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 3.168186902999878, |
| "learning_rate": 4.994073411208133e-06, |
| "loss": 0.6608, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.17825059101654847, |
| "grad_norm": 3.436844825744629, |
| "learning_rate": 4.994030404241864e-06, |
| "loss": 0.7227, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.17872340425531916, |
| "grad_norm": 2.8998289108276367, |
| "learning_rate": 4.993987241983258e-06, |
| "loss": 0.6512, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.17919621749408984, |
| "grad_norm": 3.407191514968872, |
| "learning_rate": 4.993943924435002e-06, |
| "loss": 0.616, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.17966903073286053, |
| "grad_norm": 3.744858741760254, |
| "learning_rate": 4.993900451599793e-06, |
| "loss": 0.8599, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.18014184397163122, |
| "grad_norm": 3.486283779144287, |
| "learning_rate": 4.993856823480338e-06, |
| "loss": 0.6634, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1806146572104019, |
| "grad_norm": 2.895719051361084, |
| "learning_rate": 4.993813040079355e-06, |
| "loss": 0.6972, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.1810874704491726, |
| "grad_norm": 2.814133882522583, |
| "learning_rate": 4.993769101399569e-06, |
| "loss": 0.6271, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.18156028368794327, |
| "grad_norm": 2.8609800338745117, |
| "learning_rate": 4.993725007443715e-06, |
| "loss": 0.6481, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.18203309692671396, |
| "grad_norm": 3.2829644680023193, |
| "learning_rate": 4.99368075821454e-06, |
| "loss": 0.7999, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.18250591016548465, |
| "grad_norm": 3.1417458057403564, |
| "learning_rate": 4.993636353714798e-06, |
| "loss": 0.6972, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.1829787234042553, |
| "grad_norm": 3.0679385662078857, |
| "learning_rate": 4.993591793947256e-06, |
| "loss": 0.667, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.183451536643026, |
| "grad_norm": 3.1387410163879395, |
| "learning_rate": 4.993547078914686e-06, |
| "loss": 0.7618, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.18392434988179668, |
| "grad_norm": 2.9181406497955322, |
| "learning_rate": 4.993502208619872e-06, |
| "loss": 0.7391, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.18439716312056736, |
| "grad_norm": 2.8952157497406006, |
| "learning_rate": 4.993457183065611e-06, |
| "loss": 0.6988, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.18486997635933805, |
| "grad_norm": 3.2274813652038574, |
| "learning_rate": 4.993412002254704e-06, |
| "loss": 0.688, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.18534278959810874, |
| "grad_norm": 3.4693779945373535, |
| "learning_rate": 4.993366666189965e-06, |
| "loss": 0.6634, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.18581560283687942, |
| "grad_norm": 3.5358526706695557, |
| "learning_rate": 4.993321174874217e-06, |
| "loss": 0.7343, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.1862884160756501, |
| "grad_norm": 3.013338088989258, |
| "learning_rate": 4.993275528310292e-06, |
| "loss": 0.7579, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1867612293144208, |
| "grad_norm": 2.694772720336914, |
| "learning_rate": 4.993229726501033e-06, |
| "loss": 0.718, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.18723404255319148, |
| "grad_norm": 3.070612907409668, |
| "learning_rate": 4.9931837694492915e-06, |
| "loss": 0.6438, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.18770685579196217, |
| "grad_norm": 2.9193027019500732, |
| "learning_rate": 4.993137657157928e-06, |
| "loss": 0.6788, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.18817966903073285, |
| "grad_norm": 3.047682046890259, |
| "learning_rate": 4.993091389629816e-06, |
| "loss": 0.6826, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.18865248226950354, |
| "grad_norm": 2.9629905223846436, |
| "learning_rate": 4.993044966867834e-06, |
| "loss": 0.7196, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.18912529550827423, |
| "grad_norm": 3.0692050457000732, |
| "learning_rate": 4.992998388874874e-06, |
| "loss": 0.7015, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1895981087470449, |
| "grad_norm": 3.5427212715148926, |
| "learning_rate": 4.992951655653836e-06, |
| "loss": 0.8292, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.1900709219858156, |
| "grad_norm": 2.643526554107666, |
| "learning_rate": 4.992904767207629e-06, |
| "loss": 0.624, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.19054373522458629, |
| "grad_norm": 3.1185996532440186, |
| "learning_rate": 4.992857723539173e-06, |
| "loss": 0.7354, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.19101654846335697, |
| "grad_norm": 3.006856679916382, |
| "learning_rate": 4.992810524651398e-06, |
| "loss": 0.7752, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.19148936170212766, |
| "grad_norm": 2.9913275241851807, |
| "learning_rate": 4.9927631705472425e-06, |
| "loss": 0.7306, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.19196217494089834, |
| "grad_norm": 2.6794071197509766, |
| "learning_rate": 4.992715661229655e-06, |
| "loss": 0.6136, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.19243498817966903, |
| "grad_norm": 3.5933966636657715, |
| "learning_rate": 4.992667996701593e-06, |
| "loss": 0.7024, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.19290780141843972, |
| "grad_norm": 2.862187623977661, |
| "learning_rate": 4.992620176966025e-06, |
| "loss": 0.692, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.1933806146572104, |
| "grad_norm": 3.076845407485962, |
| "learning_rate": 4.9925722020259286e-06, |
| "loss": 0.7475, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.1938534278959811, |
| "grad_norm": 3.372919797897339, |
| "learning_rate": 4.9925240718842895e-06, |
| "loss": 0.6886, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.19432624113475178, |
| "grad_norm": 2.922977924346924, |
| "learning_rate": 4.992475786544108e-06, |
| "loss": 0.7049, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.19479905437352246, |
| "grad_norm": 2.908034324645996, |
| "learning_rate": 4.992427346008387e-06, |
| "loss": 0.6498, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.19527186761229315, |
| "grad_norm": 3.096723794937134, |
| "learning_rate": 4.992378750280144e-06, |
| "loss": 0.7151, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.19574468085106383, |
| "grad_norm": 2.895237684249878, |
| "learning_rate": 4.992329999362405e-06, |
| "loss": 0.7277, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.19621749408983452, |
| "grad_norm": 2.718230724334717, |
| "learning_rate": 4.9922810932582065e-06, |
| "loss": 0.6375, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.1966903073286052, |
| "grad_norm": 3.187743663787842, |
| "learning_rate": 4.992232031970592e-06, |
| "loss": 0.6528, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.1971631205673759, |
| "grad_norm": 2.996406316757202, |
| "learning_rate": 4.992182815502616e-06, |
| "loss": 0.6552, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.19763593380614658, |
| "grad_norm": 3.301084041595459, |
| "learning_rate": 4.992133443857345e-06, |
| "loss": 0.7061, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.19810874704491727, |
| "grad_norm": 3.7874677181243896, |
| "learning_rate": 4.992083917037853e-06, |
| "loss": 0.7859, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.19858156028368795, |
| "grad_norm": 3.124253511428833, |
| "learning_rate": 4.992034235047222e-06, |
| "loss": 0.7615, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19905437352245864, |
| "grad_norm": 3.0488970279693604, |
| "learning_rate": 4.991984397888546e-06, |
| "loss": 0.6916, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.19952718676122932, |
| "grad_norm": 3.1241321563720703, |
| "learning_rate": 4.991934405564929e-06, |
| "loss": 0.7055, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 3.396632432937622, |
| "learning_rate": 4.991884258079484e-06, |
| "loss": 0.7675, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.2004728132387707, |
| "grad_norm": 3.7776873111724854, |
| "learning_rate": 4.9918339554353316e-06, |
| "loss": 0.7371, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.20094562647754138, |
| "grad_norm": 3.3356032371520996, |
| "learning_rate": 4.991783497635606e-06, |
| "loss": 0.6778, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.20141843971631207, |
| "grad_norm": 2.988856792449951, |
| "learning_rate": 4.9917328846834474e-06, |
| "loss": 0.6795, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.20189125295508276, |
| "grad_norm": 3.264183282852173, |
| "learning_rate": 4.99168211658201e-06, |
| "loss": 0.7707, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.20236406619385341, |
| "grad_norm": 3.878068208694458, |
| "learning_rate": 4.991631193334451e-06, |
| "loss": 0.857, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.2028368794326241, |
| "grad_norm": 3.6377553939819336, |
| "learning_rate": 4.991580114943943e-06, |
| "loss": 0.8033, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.2033096926713948, |
| "grad_norm": 2.95393967628479, |
| "learning_rate": 4.991528881413667e-06, |
| "loss": 0.6809, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.20378250591016547, |
| "grad_norm": 3.058704376220703, |
| "learning_rate": 4.9914774927468125e-06, |
| "loss": 0.6664, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.20425531914893616, |
| "grad_norm": 2.7783217430114746, |
| "learning_rate": 4.9914259489465795e-06, |
| "loss": 0.6478, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.20472813238770685, |
| "grad_norm": 2.4825217723846436, |
| "learning_rate": 4.991374250016177e-06, |
| "loss": 0.6598, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.20520094562647753, |
| "grad_norm": 2.8753600120544434, |
| "learning_rate": 4.991322395958824e-06, |
| "loss": 0.6947, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.20567375886524822, |
| "grad_norm": 3.2339367866516113, |
| "learning_rate": 4.99127038677775e-06, |
| "loss": 0.8201, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.2061465721040189, |
| "grad_norm": 2.9065537452697754, |
| "learning_rate": 4.991218222476193e-06, |
| "loss": 0.6679, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.2066193853427896, |
| "grad_norm": 3.283228874206543, |
| "learning_rate": 4.991165903057401e-06, |
| "loss": 0.8039, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.20709219858156028, |
| "grad_norm": 3.429872751235962, |
| "learning_rate": 4.991113428524631e-06, |
| "loss": 0.7392, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.20756501182033096, |
| "grad_norm": 3.118943452835083, |
| "learning_rate": 4.991060798881152e-06, |
| "loss": 0.6794, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.20803782505910165, |
| "grad_norm": 3.395970106124878, |
| "learning_rate": 4.99100801413024e-06, |
| "loss": 0.6862, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.20851063829787234, |
| "grad_norm": 2.869191884994507, |
| "learning_rate": 4.99095507427518e-06, |
| "loss": 0.6076, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.20898345153664302, |
| "grad_norm": 3.1934661865234375, |
| "learning_rate": 4.990901979319272e-06, |
| "loss": 0.6927, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.2094562647754137, |
| "grad_norm": 2.9068603515625, |
| "learning_rate": 4.990848729265819e-06, |
| "loss": 0.6864, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.2099290780141844, |
| "grad_norm": 3.0535948276519775, |
| "learning_rate": 4.9907953241181375e-06, |
| "loss": 0.6396, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.21040189125295508, |
| "grad_norm": 2.871511459350586, |
| "learning_rate": 4.990741763879554e-06, |
| "loss": 0.6743, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.21087470449172577, |
| "grad_norm": 2.9184393882751465, |
| "learning_rate": 4.9906880485534015e-06, |
| "loss": 0.6786, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.21134751773049645, |
| "grad_norm": 3.0628271102905273, |
| "learning_rate": 4.990634178143026e-06, |
| "loss": 0.6326, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.21182033096926714, |
| "grad_norm": 3.7878305912017822, |
| "learning_rate": 4.990580152651782e-06, |
| "loss": 0.7944, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.21229314420803783, |
| "grad_norm": 2.8577189445495605, |
| "learning_rate": 4.990525972083031e-06, |
| "loss": 0.71, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 3.307769775390625, |
| "learning_rate": 4.99047163644015e-06, |
| "loss": 0.6893, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2132387706855792, |
| "grad_norm": 2.7391717433929443, |
| "learning_rate": 4.990417145726519e-06, |
| "loss": 0.712, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.21371158392434988, |
| "grad_norm": 2.938044786453247, |
| "learning_rate": 4.990362499945534e-06, |
| "loss": 0.7516, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.21418439716312057, |
| "grad_norm": 2.7831056118011475, |
| "learning_rate": 4.990307699100595e-06, |
| "loss": 0.6168, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.21465721040189126, |
| "grad_norm": 2.907977342605591, |
| "learning_rate": 4.990252743195116e-06, |
| "loss": 0.6706, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.21513002364066194, |
| "grad_norm": 3.7882161140441895, |
| "learning_rate": 4.990197632232517e-06, |
| "loss": 0.6847, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.21560283687943263, |
| "grad_norm": 2.899716854095459, |
| "learning_rate": 4.990142366216232e-06, |
| "loss": 0.6699, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.21607565011820332, |
| "grad_norm": 2.907003879547119, |
| "learning_rate": 4.990086945149701e-06, |
| "loss": 0.6864, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.216548463356974, |
| "grad_norm": 3.2407333850860596, |
| "learning_rate": 4.9900313690363736e-06, |
| "loss": 0.692, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2170212765957447, |
| "grad_norm": 2.9055583477020264, |
| "learning_rate": 4.989975637879712e-06, |
| "loss": 0.7113, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.21749408983451538, |
| "grad_norm": 2.9836206436157227, |
| "learning_rate": 4.989919751683184e-06, |
| "loss": 0.6673, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.21796690307328606, |
| "grad_norm": 3.371035575866699, |
| "learning_rate": 4.989863710450273e-06, |
| "loss": 0.7181, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.21843971631205675, |
| "grad_norm": 2.9636635780334473, |
| "learning_rate": 4.989807514184465e-06, |
| "loss": 0.6082, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.21891252955082743, |
| "grad_norm": 2.9634664058685303, |
| "learning_rate": 4.9897511628892615e-06, |
| "loss": 0.7086, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.21938534278959812, |
| "grad_norm": 3.154763698577881, |
| "learning_rate": 4.98969465656817e-06, |
| "loss": 0.7027, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2198581560283688, |
| "grad_norm": 2.9959890842437744, |
| "learning_rate": 4.98963799522471e-06, |
| "loss": 0.6498, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.2203309692671395, |
| "grad_norm": 3.5470590591430664, |
| "learning_rate": 4.989581178862408e-06, |
| "loss": 0.7199, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.22080378250591018, |
| "grad_norm": 7.1873369216918945, |
| "learning_rate": 4.989524207484802e-06, |
| "loss": 0.6676, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.22127659574468084, |
| "grad_norm": 3.1099541187286377, |
| "learning_rate": 4.98946708109544e-06, |
| "loss": 0.6785, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.22174940898345152, |
| "grad_norm": 2.830991506576538, |
| "learning_rate": 4.9894097996978795e-06, |
| "loss": 0.6456, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 3.0212316513061523, |
| "learning_rate": 4.989352363295687e-06, |
| "loss": 0.6048, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2226950354609929, |
| "grad_norm": 3.18776798248291, |
| "learning_rate": 4.989294771892437e-06, |
| "loss": 0.7078, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.22316784869976358, |
| "grad_norm": 2.9972598552703857, |
| "learning_rate": 4.989237025491717e-06, |
| "loss": 0.7082, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.22364066193853427, |
| "grad_norm": 3.4935688972473145, |
| "learning_rate": 4.989179124097123e-06, |
| "loss": 0.8199, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.22411347517730495, |
| "grad_norm": 2.6485543251037598, |
| "learning_rate": 4.9891210677122595e-06, |
| "loss": 0.6371, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.22458628841607564, |
| "grad_norm": 2.969233512878418, |
| "learning_rate": 4.989062856340742e-06, |
| "loss": 0.6879, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.22505910165484633, |
| "grad_norm": 2.881875514984131, |
| "learning_rate": 4.989004489986194e-06, |
| "loss": 0.7415, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.225531914893617, |
| "grad_norm": 2.624540090560913, |
| "learning_rate": 4.98894596865225e-06, |
| "loss": 0.6522, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.2260047281323877, |
| "grad_norm": 3.61075496673584, |
| "learning_rate": 4.988887292342555e-06, |
| "loss": 0.7109, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.2264775413711584, |
| "grad_norm": 2.9368972778320312, |
| "learning_rate": 4.988828461060762e-06, |
| "loss": 0.6843, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.22695035460992907, |
| "grad_norm": 3.0670197010040283, |
| "learning_rate": 4.988769474810533e-06, |
| "loss": 0.6807, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.22742316784869976, |
| "grad_norm": 2.9662792682647705, |
| "learning_rate": 4.988710333595542e-06, |
| "loss": 0.6796, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.22789598108747045, |
| "grad_norm": 2.971235752105713, |
| "learning_rate": 4.988651037419472e-06, |
| "loss": 0.696, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.22836879432624113, |
| "grad_norm": 2.931884527206421, |
| "learning_rate": 4.988591586286013e-06, |
| "loss": 0.7323, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.22884160756501182, |
| "grad_norm": 2.8114213943481445, |
| "learning_rate": 4.988531980198868e-06, |
| "loss": 0.6584, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.2293144208037825, |
| "grad_norm": 3.2785916328430176, |
| "learning_rate": 4.98847221916175e-06, |
| "loss": 0.7514, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2297872340425532, |
| "grad_norm": 3.0520215034484863, |
| "learning_rate": 4.988412303178377e-06, |
| "loss": 0.7564, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.23026004728132388, |
| "grad_norm": 3.181002616882324, |
| "learning_rate": 4.988352232252483e-06, |
| "loss": 0.6768, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.23073286052009456, |
| "grad_norm": 3.4953625202178955, |
| "learning_rate": 4.988292006387805e-06, |
| "loss": 0.7143, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.23120567375886525, |
| "grad_norm": 3.326571226119995, |
| "learning_rate": 4.988231625588096e-06, |
| "loss": 0.7318, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.23167848699763594, |
| "grad_norm": 3.09614634513855, |
| "learning_rate": 4.988171089857113e-06, |
| "loss": 0.6574, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.23215130023640662, |
| "grad_norm": 2.7439446449279785, |
| "learning_rate": 4.9881103991986265e-06, |
| "loss": 0.6637, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.2326241134751773, |
| "grad_norm": 3.0681190490722656, |
| "learning_rate": 4.988049553616416e-06, |
| "loss": 0.6326, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.233096926713948, |
| "grad_norm": 3.0757341384887695, |
| "learning_rate": 4.98798855311427e-06, |
| "loss": 0.695, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.23356973995271868, |
| "grad_norm": 2.8637635707855225, |
| "learning_rate": 4.987927397695985e-06, |
| "loss": 0.6598, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.23404255319148937, |
| "grad_norm": 3.3641068935394287, |
| "learning_rate": 4.9878660873653715e-06, |
| "loss": 0.7435, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.23451536643026005, |
| "grad_norm": 3.5025596618652344, |
| "learning_rate": 4.987804622126245e-06, |
| "loss": 0.735, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.23498817966903074, |
| "grad_norm": 2.9298837184906006, |
| "learning_rate": 4.987743001982434e-06, |
| "loss": 0.7063, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.23546099290780143, |
| "grad_norm": 2.70358943939209, |
| "learning_rate": 4.987681226937774e-06, |
| "loss": 0.6799, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.2359338061465721, |
| "grad_norm": 3.027871608734131, |
| "learning_rate": 4.9876192969961125e-06, |
| "loss": 0.6881, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.2364066193853428, |
| "grad_norm": 3.362306594848633, |
| "learning_rate": 4.987557212161304e-06, |
| "loss": 0.7906, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.23687943262411348, |
| "grad_norm": 3.3136050701141357, |
| "learning_rate": 4.987494972437217e-06, |
| "loss": 0.6878, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.23735224586288417, |
| "grad_norm": 3.017089605331421, |
| "learning_rate": 4.9874325778277255e-06, |
| "loss": 0.7279, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.23782505910165486, |
| "grad_norm": 2.8300516605377197, |
| "learning_rate": 4.987370028336714e-06, |
| "loss": 0.6864, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.23829787234042554, |
| "grad_norm": 3.201860189437866, |
| "learning_rate": 4.987307323968077e-06, |
| "loss": 0.7531, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.23877068557919623, |
| "grad_norm": 2.685396194458008, |
| "learning_rate": 4.987244464725721e-06, |
| "loss": 0.5849, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.23924349881796692, |
| "grad_norm": 2.8715312480926514, |
| "learning_rate": 4.987181450613557e-06, |
| "loss": 0.675, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.2397163120567376, |
| "grad_norm": 2.813908815383911, |
| "learning_rate": 4.987118281635511e-06, |
| "loss": 0.6841, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2401891252955083, |
| "grad_norm": 3.2738473415374756, |
| "learning_rate": 4.987054957795514e-06, |
| "loss": 0.7158, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.24066193853427895, |
| "grad_norm": 2.896134376525879, |
| "learning_rate": 4.986991479097511e-06, |
| "loss": 0.7542, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.24113475177304963, |
| "grad_norm": 3.0390403270721436, |
| "learning_rate": 4.986927845545454e-06, |
| "loss": 0.6733, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.24160756501182032, |
| "grad_norm": 3.0300254821777344, |
| "learning_rate": 4.9868640571433044e-06, |
| "loss": 0.722, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.242080378250591, |
| "grad_norm": 3.3037352561950684, |
| "learning_rate": 4.986800113895035e-06, |
| "loss": 0.6811, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2425531914893617, |
| "grad_norm": 3.0358474254608154, |
| "learning_rate": 4.986736015804627e-06, |
| "loss": 0.7348, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.24302600472813238, |
| "grad_norm": 3.108792304992676, |
| "learning_rate": 4.986671762876071e-06, |
| "loss": 0.6096, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.24349881796690306, |
| "grad_norm": 3.1316237449645996, |
| "learning_rate": 4.986607355113367e-06, |
| "loss": 0.6357, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.24397163120567375, |
| "grad_norm": 3.3095219135284424, |
| "learning_rate": 4.986542792520528e-06, |
| "loss": 0.7515, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.24444444444444444, |
| "grad_norm": 3.4775984287261963, |
| "learning_rate": 4.986478075101572e-06, |
| "loss": 0.7104, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.24491725768321512, |
| "grad_norm": 3.341708183288574, |
| "learning_rate": 4.986413202860528e-06, |
| "loss": 0.7339, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.2453900709219858, |
| "grad_norm": 2.9646966457366943, |
| "learning_rate": 4.986348175801438e-06, |
| "loss": 0.6032, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.2458628841607565, |
| "grad_norm": 3.1853902339935303, |
| "learning_rate": 4.986282993928349e-06, |
| "loss": 0.6925, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.24633569739952718, |
| "grad_norm": 3.286909818649292, |
| "learning_rate": 4.98621765724532e-06, |
| "loss": 0.7447, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.24680851063829787, |
| "grad_norm": 3.2255051136016846, |
| "learning_rate": 4.986152165756419e-06, |
| "loss": 0.7747, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.24728132387706855, |
| "grad_norm": 3.002352237701416, |
| "learning_rate": 4.986086519465724e-06, |
| "loss": 0.6472, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.24775413711583924, |
| "grad_norm": 3.4738974571228027, |
| "learning_rate": 4.986020718377322e-06, |
| "loss": 0.7381, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.24822695035460993, |
| "grad_norm": 3.4470200538635254, |
| "learning_rate": 4.985954762495312e-06, |
| "loss": 0.6878, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.2486997635933806, |
| "grad_norm": 2.9219350814819336, |
| "learning_rate": 4.985888651823799e-06, |
| "loss": 0.6317, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.2491725768321513, |
| "grad_norm": 3.061767101287842, |
| "learning_rate": 4.985822386366899e-06, |
| "loss": 0.6842, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.24964539007092199, |
| "grad_norm": 3.0291247367858887, |
| "learning_rate": 4.985755966128742e-06, |
| "loss": 0.6852, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.25011820330969264, |
| "grad_norm": 2.964280843734741, |
| "learning_rate": 4.985689391113457e-06, |
| "loss": 0.7738, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.25059101654846333, |
| "grad_norm": 3.058302164077759, |
| "learning_rate": 4.9856226613251955e-06, |
| "loss": 0.6677, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.251063829787234, |
| "grad_norm": 3.345141649246216, |
| "learning_rate": 4.985555776768109e-06, |
| "loss": 0.7837, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.2515366430260047, |
| "grad_norm": 3.565031051635742, |
| "learning_rate": 4.9854887374463636e-06, |
| "loss": 0.7231, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.2520094562647754, |
| "grad_norm": 2.7953789234161377, |
| "learning_rate": 4.985421543364132e-06, |
| "loss": 0.6102, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2524822695035461, |
| "grad_norm": 2.887606620788574, |
| "learning_rate": 4.9853541945256e-06, |
| "loss": 0.6289, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.25295508274231676, |
| "grad_norm": 3.1480495929718018, |
| "learning_rate": 4.985286690934961e-06, |
| "loss": 0.6348, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.25342789598108745, |
| "grad_norm": 2.8912761211395264, |
| "learning_rate": 4.985219032596416e-06, |
| "loss": 0.595, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.25390070921985813, |
| "grad_norm": 2.947936534881592, |
| "learning_rate": 4.98515121951418e-06, |
| "loss": 0.6196, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.2543735224586288, |
| "grad_norm": 3.1085827350616455, |
| "learning_rate": 4.985083251692474e-06, |
| "loss": 0.6387, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.2548463356973995, |
| "grad_norm": 3.1688334941864014, |
| "learning_rate": 4.985015129135531e-06, |
| "loss": 0.7055, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 3.075042963027954, |
| "learning_rate": 4.984946851847593e-06, |
| "loss": 0.7515, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.2557919621749409, |
| "grad_norm": 3.1933093070983887, |
| "learning_rate": 4.98487841983291e-06, |
| "loss": 0.7054, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.25626477541371157, |
| "grad_norm": 3.043473958969116, |
| "learning_rate": 4.984809833095744e-06, |
| "loss": 0.6281, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.25673758865248225, |
| "grad_norm": 3.0532584190368652, |
| "learning_rate": 4.9847410916403645e-06, |
| "loss": 0.6155, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.25721040189125294, |
| "grad_norm": 3.608480215072632, |
| "learning_rate": 4.984672195471053e-06, |
| "loss": 0.7363, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.2576832151300236, |
| "grad_norm": 2.7491862773895264, |
| "learning_rate": 4.9846031445921e-06, |
| "loss": 0.6594, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2581560283687943, |
| "grad_norm": 2.8602418899536133, |
| "learning_rate": 4.984533939007802e-06, |
| "loss": 0.6742, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.258628841607565, |
| "grad_norm": 3.1782007217407227, |
| "learning_rate": 4.98446457872247e-06, |
| "loss": 0.731, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.2591016548463357, |
| "grad_norm": 2.796147584915161, |
| "learning_rate": 4.984395063740423e-06, |
| "loss": 0.6617, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.25957446808510637, |
| "grad_norm": 2.8392202854156494, |
| "learning_rate": 4.984325394065991e-06, |
| "loss": 0.6753, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.26004728132387706, |
| "grad_norm": 3.134672164916992, |
| "learning_rate": 4.984255569703508e-06, |
| "loss": 0.7222, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.26052009456264774, |
| "grad_norm": 2.734330177307129, |
| "learning_rate": 4.984185590657325e-06, |
| "loss": 0.6098, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.26099290780141843, |
| "grad_norm": 3.739010810852051, |
| "learning_rate": 4.984115456931798e-06, |
| "loss": 0.7457, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2614657210401891, |
| "grad_norm": 2.8412528038024902, |
| "learning_rate": 4.9840451685312925e-06, |
| "loss": 0.6972, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.2619385342789598, |
| "grad_norm": 3.017395496368408, |
| "learning_rate": 4.983974725460188e-06, |
| "loss": 0.6887, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.2624113475177305, |
| "grad_norm": 3.2746949195861816, |
| "learning_rate": 4.98390412772287e-06, |
| "loss": 0.7047, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.2628841607565012, |
| "grad_norm": 3.1561965942382812, |
| "learning_rate": 4.983833375323732e-06, |
| "loss": 0.7726, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.26335697399527186, |
| "grad_norm": 3.2367217540740967, |
| "learning_rate": 4.9837624682671816e-06, |
| "loss": 0.6348, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.26382978723404255, |
| "grad_norm": 2.8195858001708984, |
| "learning_rate": 4.983691406557633e-06, |
| "loss": 0.6387, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.26430260047281323, |
| "grad_norm": 3.349820852279663, |
| "learning_rate": 4.983620190199511e-06, |
| "loss": 0.6776, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.2647754137115839, |
| "grad_norm": 2.8025588989257812, |
| "learning_rate": 4.98354881919725e-06, |
| "loss": 0.6512, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2652482269503546, |
| "grad_norm": 2.9125499725341797, |
| "learning_rate": 4.983477293555295e-06, |
| "loss": 0.7024, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.2657210401891253, |
| "grad_norm": 3.3479275703430176, |
| "learning_rate": 4.983405613278098e-06, |
| "loss": 0.688, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.266193853427896, |
| "grad_norm": 3.123971462249756, |
| "learning_rate": 4.983333778370123e-06, |
| "loss": 0.6743, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 2.891625165939331, |
| "learning_rate": 4.983261788835843e-06, |
| "loss": 0.5971, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.26713947990543735, |
| "grad_norm": 3.5066864490509033, |
| "learning_rate": 4.98318964467974e-06, |
| "loss": 0.6958, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.26761229314420804, |
| "grad_norm": 2.570547342300415, |
| "learning_rate": 4.983117345906306e-06, |
| "loss": 0.609, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.2680851063829787, |
| "grad_norm": 3.005106210708618, |
| "learning_rate": 4.983044892520044e-06, |
| "loss": 0.6791, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.2685579196217494, |
| "grad_norm": 3.429675340652466, |
| "learning_rate": 4.982972284525463e-06, |
| "loss": 0.6625, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.2690307328605201, |
| "grad_norm": 3.825657367706299, |
| "learning_rate": 4.982899521927086e-06, |
| "loss": 0.6368, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.2695035460992908, |
| "grad_norm": 2.8699095249176025, |
| "learning_rate": 4.982826604729443e-06, |
| "loss": 0.6425, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.26997635933806147, |
| "grad_norm": 3.1688714027404785, |
| "learning_rate": 4.982753532937074e-06, |
| "loss": 0.6904, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.27044917257683215, |
| "grad_norm": 3.3889992237091064, |
| "learning_rate": 4.98268030655453e-06, |
| "loss": 0.7575, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.27092198581560284, |
| "grad_norm": 3.108315944671631, |
| "learning_rate": 4.982606925586367e-06, |
| "loss": 0.6648, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.2713947990543735, |
| "grad_norm": 3.209831953048706, |
| "learning_rate": 4.982533390037159e-06, |
| "loss": 0.657, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.2718676122931442, |
| "grad_norm": 3.1740927696228027, |
| "learning_rate": 4.982459699911482e-06, |
| "loss": 0.7262, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.2723404255319149, |
| "grad_norm": 3.0190417766571045, |
| "learning_rate": 4.982385855213924e-06, |
| "loss": 0.6368, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.2728132387706856, |
| "grad_norm": 3.05049467086792, |
| "learning_rate": 4.982311855949084e-06, |
| "loss": 0.72, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.27328605200945627, |
| "grad_norm": 2.984816551208496, |
| "learning_rate": 4.98223770212157e-06, |
| "loss": 0.6856, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.27375886524822696, |
| "grad_norm": 2.744969606399536, |
| "learning_rate": 4.982163393735998e-06, |
| "loss": 0.6023, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.27423167848699764, |
| "grad_norm": 3.170564889907837, |
| "learning_rate": 4.982088930796996e-06, |
| "loss": 0.6678, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.27470449172576833, |
| "grad_norm": 2.8686118125915527, |
| "learning_rate": 4.982014313309199e-06, |
| "loss": 0.6157, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.275177304964539, |
| "grad_norm": 2.8768694400787354, |
| "learning_rate": 4.981939541277254e-06, |
| "loss": 0.6566, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.2756501182033097, |
| "grad_norm": 2.621481418609619, |
| "learning_rate": 4.981864614705818e-06, |
| "loss": 0.7372, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.2761229314420804, |
| "grad_norm": 3.527374267578125, |
| "learning_rate": 4.981789533599554e-06, |
| "loss": 0.6485, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2765957446808511, |
| "grad_norm": 3.3141074180603027, |
| "learning_rate": 4.981714297963138e-06, |
| "loss": 0.6816, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.27706855791962176, |
| "grad_norm": 2.9247069358825684, |
| "learning_rate": 4.981638907801255e-06, |
| "loss": 0.7217, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.27754137115839245, |
| "grad_norm": 2.875236749649048, |
| "learning_rate": 4.981563363118599e-06, |
| "loss": 0.6662, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.27801418439716313, |
| "grad_norm": 2.9540364742279053, |
| "learning_rate": 4.981487663919874e-06, |
| "loss": 0.7225, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.2784869976359338, |
| "grad_norm": 2.90889310836792, |
| "learning_rate": 4.981411810209793e-06, |
| "loss": 0.6054, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.2789598108747045, |
| "grad_norm": 2.8541409969329834, |
| "learning_rate": 4.981335801993078e-06, |
| "loss": 0.6539, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2794326241134752, |
| "grad_norm": 3.1600730419158936, |
| "learning_rate": 4.981259639274465e-06, |
| "loss": 0.6415, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.2799054373522459, |
| "grad_norm": 3.569376230239868, |
| "learning_rate": 4.981183322058693e-06, |
| "loss": 0.6944, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.28037825059101656, |
| "grad_norm": 3.067667007446289, |
| "learning_rate": 4.981106850350515e-06, |
| "loss": 0.7378, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.28085106382978725, |
| "grad_norm": 3.082073450088501, |
| "learning_rate": 4.981030224154693e-06, |
| "loss": 0.693, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.28132387706855794, |
| "grad_norm": 2.902932643890381, |
| "learning_rate": 4.980953443475998e-06, |
| "loss": 0.6549, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.2817966903073286, |
| "grad_norm": 2.6821181774139404, |
| "learning_rate": 4.980876508319211e-06, |
| "loss": 0.6231, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.2822695035460993, |
| "grad_norm": 3.1747355461120605, |
| "learning_rate": 4.9807994186891215e-06, |
| "loss": 0.6826, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.28274231678487, |
| "grad_norm": 2.6975860595703125, |
| "learning_rate": 4.980722174590531e-06, |
| "loss": 0.6669, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.2832151300236407, |
| "grad_norm": 2.924285650253296, |
| "learning_rate": 4.9806447760282486e-06, |
| "loss": 0.689, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "grad_norm": 2.941417694091797, |
| "learning_rate": 4.980567223007093e-06, |
| "loss": 0.6672, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.28416075650118205, |
| "grad_norm": 2.8582186698913574, |
| "learning_rate": 4.980489515531892e-06, |
| "loss": 0.6229, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.28463356973995274, |
| "grad_norm": 2.6462013721466064, |
| "learning_rate": 4.9804116536074865e-06, |
| "loss": 0.606, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.2851063829787234, |
| "grad_norm": 2.9029998779296875, |
| "learning_rate": 4.980333637238723e-06, |
| "loss": 0.5915, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.2855791962174941, |
| "grad_norm": 3.9359042644500732, |
| "learning_rate": 4.980255466430462e-06, |
| "loss": 0.7035, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.2860520094562648, |
| "grad_norm": 3.200524091720581, |
| "learning_rate": 4.980177141187566e-06, |
| "loss": 0.7156, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.2865248226950355, |
| "grad_norm": 3.1708686351776123, |
| "learning_rate": 4.980098661514916e-06, |
| "loss": 0.746, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.28699763593380617, |
| "grad_norm": 2.8926830291748047, |
| "learning_rate": 4.980020027417397e-06, |
| "loss": 0.6282, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.28747044917257686, |
| "grad_norm": 3.0526294708251953, |
| "learning_rate": 4.979941238899906e-06, |
| "loss": 0.6594, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.28794326241134754, |
| "grad_norm": 2.9869306087493896, |
| "learning_rate": 4.9798622959673486e-06, |
| "loss": 0.7771, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.28841607565011823, |
| "grad_norm": 2.7894513607025146, |
| "learning_rate": 4.979783198624638e-06, |
| "loss": 0.6819, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.28888888888888886, |
| "grad_norm": 2.958575963973999, |
| "learning_rate": 4.9797039468767025e-06, |
| "loss": 0.6474, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.28936170212765955, |
| "grad_norm": 3.423748016357422, |
| "learning_rate": 4.979624540728475e-06, |
| "loss": 0.7389, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.28983451536643023, |
| "grad_norm": 2.9641635417938232, |
| "learning_rate": 4.9795449801849e-06, |
| "loss": 0.6005, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.2903073286052009, |
| "grad_norm": 3.02274227142334, |
| "learning_rate": 4.979465265250933e-06, |
| "loss": 0.6358, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.2907801418439716, |
| "grad_norm": 3.0562758445739746, |
| "learning_rate": 4.979385395931534e-06, |
| "loss": 0.6313, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2912529550827423, |
| "grad_norm": 3.301816701889038, |
| "learning_rate": 4.97930537223168e-06, |
| "loss": 0.7264, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.291725768321513, |
| "grad_norm": 2.975360870361328, |
| "learning_rate": 4.979225194156351e-06, |
| "loss": 0.613, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.29219858156028367, |
| "grad_norm": 2.9245030879974365, |
| "learning_rate": 4.97914486171054e-06, |
| "loss": 0.6646, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.29267139479905435, |
| "grad_norm": 3.1336188316345215, |
| "learning_rate": 4.979064374899249e-06, |
| "loss": 0.6421, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.29314420803782504, |
| "grad_norm": 3.6298763751983643, |
| "learning_rate": 4.978983733727491e-06, |
| "loss": 0.6433, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2936170212765957, |
| "grad_norm": 2.919597625732422, |
| "learning_rate": 4.9789029382002845e-06, |
| "loss": 0.6288, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.2940898345153664, |
| "grad_norm": 3.2206127643585205, |
| "learning_rate": 4.978821988322662e-06, |
| "loss": 0.7102, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.2945626477541371, |
| "grad_norm": 3.1767101287841797, |
| "learning_rate": 4.978740884099664e-06, |
| "loss": 0.6722, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.2950354609929078, |
| "grad_norm": 3.3425452709198, |
| "learning_rate": 4.97865962553634e-06, |
| "loss": 0.6492, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.29550827423167847, |
| "grad_norm": 3.0408358573913574, |
| "learning_rate": 4.97857821263775e-06, |
| "loss": 0.6522, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.29598108747044916, |
| "grad_norm": 2.8144783973693848, |
| "learning_rate": 4.978496645408963e-06, |
| "loss": 0.7237, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.29645390070921984, |
| "grad_norm": 3.7010560035705566, |
| "learning_rate": 4.978414923855057e-06, |
| "loss": 0.7509, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.29692671394799053, |
| "grad_norm": 2.9438371658325195, |
| "learning_rate": 4.978333047981122e-06, |
| "loss": 0.6244, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.2973995271867612, |
| "grad_norm": 3.285982370376587, |
| "learning_rate": 4.978251017792255e-06, |
| "loss": 0.7553, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.2978723404255319, |
| "grad_norm": 3.7021138668060303, |
| "learning_rate": 4.978168833293564e-06, |
| "loss": 0.7859, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.2983451536643026, |
| "grad_norm": 3.481858730316162, |
| "learning_rate": 4.9780864944901654e-06, |
| "loss": 0.7146, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.2988179669030733, |
| "grad_norm": 3.693824529647827, |
| "learning_rate": 4.978004001387188e-06, |
| "loss": 0.6608, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.29929078014184396, |
| "grad_norm": 3.0069146156311035, |
| "learning_rate": 4.9779213539897665e-06, |
| "loss": 0.6506, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.29976359338061465, |
| "grad_norm": 3.037644147872925, |
| "learning_rate": 4.977838552303048e-06, |
| "loss": 0.6487, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.30023640661938533, |
| "grad_norm": 3.018554449081421, |
| "learning_rate": 4.977755596332188e-06, |
| "loss": 0.6128, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.300709219858156, |
| "grad_norm": 3.000312089920044, |
| "learning_rate": 4.977672486082351e-06, |
| "loss": 0.6431, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.3011820330969267, |
| "grad_norm": 2.836803913116455, |
| "learning_rate": 4.977589221558713e-06, |
| "loss": 0.5914, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3016548463356974, |
| "grad_norm": 3.080469846725464, |
| "learning_rate": 4.977505802766457e-06, |
| "loss": 0.7265, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.3021276595744681, |
| "grad_norm": 3.2245471477508545, |
| "learning_rate": 4.97742222971078e-06, |
| "loss": 0.6895, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.30260047281323876, |
| "grad_norm": 3.559006452560425, |
| "learning_rate": 4.977338502396882e-06, |
| "loss": 0.7439, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.30307328605200945, |
| "grad_norm": 2.9116289615631104, |
| "learning_rate": 4.9772546208299795e-06, |
| "loss": 0.6907, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.30354609929078014, |
| "grad_norm": 3.3645524978637695, |
| "learning_rate": 4.977170585015295e-06, |
| "loss": 0.6983, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.3040189125295508, |
| "grad_norm": 3.080148458480835, |
| "learning_rate": 4.977086394958058e-06, |
| "loss": 0.7016, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.3044917257683215, |
| "grad_norm": 2.9276750087738037, |
| "learning_rate": 4.977002050663515e-06, |
| "loss": 0.6509, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.3049645390070922, |
| "grad_norm": 3.183609962463379, |
| "learning_rate": 4.976917552136914e-06, |
| "loss": 0.6814, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.3054373522458629, |
| "grad_norm": 3.0980000495910645, |
| "learning_rate": 4.976832899383519e-06, |
| "loss": 0.6319, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.30591016548463357, |
| "grad_norm": 3.211376190185547, |
| "learning_rate": 4.9767480924086e-06, |
| "loss": 0.6365, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.30638297872340425, |
| "grad_norm": 3.214430093765259, |
| "learning_rate": 4.976663131217437e-06, |
| "loss": 0.6006, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.30685579196217494, |
| "grad_norm": 3.0914318561553955, |
| "learning_rate": 4.976578015815321e-06, |
| "loss": 0.7162, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.3073286052009456, |
| "grad_norm": 2.7644500732421875, |
| "learning_rate": 4.976492746207551e-06, |
| "loss": 0.6045, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3078014184397163, |
| "grad_norm": 3.1913280487060547, |
| "learning_rate": 4.9764073223994374e-06, |
| "loss": 0.6796, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.308274231678487, |
| "grad_norm": 2.8919692039489746, |
| "learning_rate": 4.976321744396299e-06, |
| "loss": 0.6683, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.3087470449172577, |
| "grad_norm": 2.862234115600586, |
| "learning_rate": 4.976236012203463e-06, |
| "loss": 0.6631, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.30921985815602837, |
| "grad_norm": 2.9708092212677, |
| "learning_rate": 4.976150125826268e-06, |
| "loss": 0.6326, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.30969267139479906, |
| "grad_norm": 2.892465353012085, |
| "learning_rate": 4.976064085270063e-06, |
| "loss": 0.6574, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.31016548463356974, |
| "grad_norm": 3.9215126037597656, |
| "learning_rate": 4.975977890540205e-06, |
| "loss": 0.7351, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.31063829787234043, |
| "grad_norm": 2.9544081687927246, |
| "learning_rate": 4.975891541642059e-06, |
| "loss": 0.7264, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.3111111111111111, |
| "grad_norm": 2.995035409927368, |
| "learning_rate": 4.975805038581005e-06, |
| "loss": 0.7405, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.3115839243498818, |
| "grad_norm": 2.9653120040893555, |
| "learning_rate": 4.975718381362427e-06, |
| "loss": 0.679, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3120567375886525, |
| "grad_norm": 2.93976092338562, |
| "learning_rate": 4.9756315699917205e-06, |
| "loss": 0.627, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3125295508274232, |
| "grad_norm": 3.106522560119629, |
| "learning_rate": 4.9755446044742915e-06, |
| "loss": 0.6329, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.31300236406619386, |
| "grad_norm": 3.0238280296325684, |
| "learning_rate": 4.975457484815554e-06, |
| "loss": 0.6643, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.31347517730496455, |
| "grad_norm": 2.943528175354004, |
| "learning_rate": 4.9753702110209356e-06, |
| "loss": 0.668, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.31394799054373523, |
| "grad_norm": 2.6840121746063232, |
| "learning_rate": 4.9752827830958676e-06, |
| "loss": 0.5482, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.3144208037825059, |
| "grad_norm": 2.823875904083252, |
| "learning_rate": 4.975195201045794e-06, |
| "loss": 0.7017, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.3148936170212766, |
| "grad_norm": 3.148181200027466, |
| "learning_rate": 4.975107464876168e-06, |
| "loss": 0.747, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3153664302600473, |
| "grad_norm": 2.630584478378296, |
| "learning_rate": 4.9750195745924545e-06, |
| "loss": 0.5987, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.315839243498818, |
| "grad_norm": 3.075866460800171, |
| "learning_rate": 4.974931530200124e-06, |
| "loss": 0.664, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.31631205673758866, |
| "grad_norm": 2.947197914123535, |
| "learning_rate": 4.974843331704659e-06, |
| "loss": 0.631, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.31678486997635935, |
| "grad_norm": 3.519646644592285, |
| "learning_rate": 4.974754979111552e-06, |
| "loss": 0.7154, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.31725768321513004, |
| "grad_norm": 2.8687186241149902, |
| "learning_rate": 4.974666472426305e-06, |
| "loss": 0.6366, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.3177304964539007, |
| "grad_norm": 2.6966612339019775, |
| "learning_rate": 4.974577811654426e-06, |
| "loss": 0.7112, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3182033096926714, |
| "grad_norm": 3.1390228271484375, |
| "learning_rate": 4.974488996801439e-06, |
| "loss": 0.6882, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.3186761229314421, |
| "grad_norm": 3.4667599201202393, |
| "learning_rate": 4.974400027872871e-06, |
| "loss": 0.7153, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.3191489361702128, |
| "grad_norm": 2.9632184505462646, |
| "learning_rate": 4.974310904874265e-06, |
| "loss": 0.7081, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.31962174940898347, |
| "grad_norm": 3.46150279045105, |
| "learning_rate": 4.9742216278111666e-06, |
| "loss": 0.6242, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.32009456264775416, |
| "grad_norm": 3.380403757095337, |
| "learning_rate": 4.974132196689137e-06, |
| "loss": 0.6863, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.32056737588652484, |
| "grad_norm": 3.4279606342315674, |
| "learning_rate": 4.974042611513746e-06, |
| "loss": 0.6388, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.3210401891252955, |
| "grad_norm": 2.634523391723633, |
| "learning_rate": 4.973952872290568e-06, |
| "loss": 0.6038, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3215130023640662, |
| "grad_norm": 3.19693922996521, |
| "learning_rate": 4.973862979025194e-06, |
| "loss": 0.6383, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3219858156028369, |
| "grad_norm": 3.437692165374756, |
| "learning_rate": 4.973772931723218e-06, |
| "loss": 0.7288, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.3224586288416076, |
| "grad_norm": 2.506301164627075, |
| "learning_rate": 4.97368273039025e-06, |
| "loss": 0.5707, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.3229314420803783, |
| "grad_norm": 3.0942845344543457, |
| "learning_rate": 4.9735923750319044e-06, |
| "loss": 0.6348, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.32340425531914896, |
| "grad_norm": 3.0889835357666016, |
| "learning_rate": 4.973501865653809e-06, |
| "loss": 0.6697, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.32387706855791965, |
| "grad_norm": 3.0391931533813477, |
| "learning_rate": 4.973411202261598e-06, |
| "loss": 0.7091, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.32434988179669033, |
| "grad_norm": 3.0333497524261475, |
| "learning_rate": 4.973320384860917e-06, |
| "loss": 0.6403, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.324822695035461, |
| "grad_norm": 2.9714622497558594, |
| "learning_rate": 4.973229413457421e-06, |
| "loss": 0.6977, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.3252955082742317, |
| "grad_norm": 3.057558298110962, |
| "learning_rate": 4.973138288056774e-06, |
| "loss": 0.7236, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.3257683215130024, |
| "grad_norm": 2.921093463897705, |
| "learning_rate": 4.97304700866465e-06, |
| "loss": 0.576, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.3262411347517731, |
| "grad_norm": 3.0287256240844727, |
| "learning_rate": 4.972955575286732e-06, |
| "loss": 0.7077, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.32671394799054376, |
| "grad_norm": 2.8621346950531006, |
| "learning_rate": 4.972863987928716e-06, |
| "loss": 0.6952, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3271867612293144, |
| "grad_norm": 2.631359100341797, |
| "learning_rate": 4.9727722465963006e-06, |
| "loss": 0.6931, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.3276595744680851, |
| "grad_norm": 2.8484320640563965, |
| "learning_rate": 4.972680351295201e-06, |
| "loss": 0.6292, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.32813238770685577, |
| "grad_norm": 2.593001365661621, |
| "learning_rate": 4.972588302031138e-06, |
| "loss": 0.5942, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.32860520094562645, |
| "grad_norm": 2.6321065425872803, |
| "learning_rate": 4.972496098809844e-06, |
| "loss": 0.65, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.32907801418439714, |
| "grad_norm": 3.2516732215881348, |
| "learning_rate": 4.972403741637059e-06, |
| "loss": 0.7385, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.3295508274231678, |
| "grad_norm": 3.180854320526123, |
| "learning_rate": 4.972311230518535e-06, |
| "loss": 0.6569, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.3300236406619385, |
| "grad_norm": 4.161016941070557, |
| "learning_rate": 4.972218565460031e-06, |
| "loss": 0.6416, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3304964539007092, |
| "grad_norm": 3.153897762298584, |
| "learning_rate": 4.972125746467317e-06, |
| "loss": 0.7196, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.3309692671394799, |
| "grad_norm": 2.9595556259155273, |
| "learning_rate": 4.972032773546173e-06, |
| "loss": 0.7093, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.33144208037825057, |
| "grad_norm": 3.1086833477020264, |
| "learning_rate": 4.9719396467023875e-06, |
| "loss": 0.6963, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.33191489361702126, |
| "grad_norm": 2.958921432495117, |
| "learning_rate": 4.971846365941759e-06, |
| "loss": 0.6518, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.33238770685579194, |
| "grad_norm": 2.8745479583740234, |
| "learning_rate": 4.971752931270096e-06, |
| "loss": 0.696, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.33286052009456263, |
| "grad_norm": 3.224358558654785, |
| "learning_rate": 4.971659342693217e-06, |
| "loss": 0.6769, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 2.696319580078125, |
| "learning_rate": 4.9715656002169486e-06, |
| "loss": 0.6833, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.333806146572104, |
| "grad_norm": 2.9283502101898193, |
| "learning_rate": 4.971471703847127e-06, |
| "loss": 0.6784, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.3342789598108747, |
| "grad_norm": 2.654914140701294, |
| "learning_rate": 4.9713776535896e-06, |
| "loss": 0.6337, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.3347517730496454, |
| "grad_norm": 3.041555643081665, |
| "learning_rate": 4.971283449450224e-06, |
| "loss": 0.6227, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.33522458628841606, |
| "grad_norm": 2.893008232116699, |
| "learning_rate": 4.971189091434863e-06, |
| "loss": 0.655, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.33569739952718675, |
| "grad_norm": 2.8806653022766113, |
| "learning_rate": 4.971094579549393e-06, |
| "loss": 0.7077, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.33617021276595743, |
| "grad_norm": 3.4830048084259033, |
| "learning_rate": 4.9709999137996986e-06, |
| "loss": 0.7461, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.3366430260047281, |
| "grad_norm": 3.155444860458374, |
| "learning_rate": 4.970905094191674e-06, |
| "loss": 0.652, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.3371158392434988, |
| "grad_norm": 2.7608706951141357, |
| "learning_rate": 4.970810120731225e-06, |
| "loss": 0.684, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.3375886524822695, |
| "grad_norm": 2.8209474086761475, |
| "learning_rate": 4.970714993424265e-06, |
| "loss": 0.6009, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3380614657210402, |
| "grad_norm": 3.6532654762268066, |
| "learning_rate": 4.9706197122767145e-06, |
| "loss": 0.702, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.33853427895981086, |
| "grad_norm": 2.6276566982269287, |
| "learning_rate": 4.970524277294508e-06, |
| "loss": 0.6338, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.33900709219858155, |
| "grad_norm": 3.509871482849121, |
| "learning_rate": 4.970428688483589e-06, |
| "loss": 0.6853, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.33947990543735224, |
| "grad_norm": 5.332682132720947, |
| "learning_rate": 4.970332945849906e-06, |
| "loss": 0.6684, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.3399527186761229, |
| "grad_norm": 2.718801975250244, |
| "learning_rate": 4.970237049399424e-06, |
| "loss": 0.6676, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 3.891003131866455, |
| "learning_rate": 4.970140999138112e-06, |
| "loss": 0.7043, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.3408983451536643, |
| "grad_norm": 2.8863155841827393, |
| "learning_rate": 4.970044795071951e-06, |
| "loss": 0.6563, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.341371158392435, |
| "grad_norm": 3.2527518272399902, |
| "learning_rate": 4.969948437206932e-06, |
| "loss": 0.7244, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.34184397163120567, |
| "grad_norm": 2.9726758003234863, |
| "learning_rate": 4.969851925549054e-06, |
| "loss": 0.6548, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.34231678486997635, |
| "grad_norm": 3.118309497833252, |
| "learning_rate": 4.969755260104327e-06, |
| "loss": 0.7293, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.34278959810874704, |
| "grad_norm": 3.373068332672119, |
| "learning_rate": 4.969658440878769e-06, |
| "loss": 0.6444, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.3432624113475177, |
| "grad_norm": 2.7157437801361084, |
| "learning_rate": 4.969561467878409e-06, |
| "loss": 0.642, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.3437352245862884, |
| "grad_norm": 2.58929705619812, |
| "learning_rate": 4.969464341109285e-06, |
| "loss": 0.6165, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.3442080378250591, |
| "grad_norm": 2.8811306953430176, |
| "learning_rate": 4.969367060577445e-06, |
| "loss": 0.7127, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.3446808510638298, |
| "grad_norm": 3.494358539581299, |
| "learning_rate": 4.969269626288946e-06, |
| "loss": 0.7103, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.34515366430260047, |
| "grad_norm": 2.9753928184509277, |
| "learning_rate": 4.969172038249855e-06, |
| "loss": 0.6911, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.34562647754137116, |
| "grad_norm": 3.2885913848876953, |
| "learning_rate": 4.969074296466247e-06, |
| "loss": 0.6968, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.34609929078014184, |
| "grad_norm": 2.7564568519592285, |
| "learning_rate": 4.968976400944211e-06, |
| "loss": 0.6843, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.34657210401891253, |
| "grad_norm": 2.9255006313323975, |
| "learning_rate": 4.96887835168984e-06, |
| "loss": 0.6024, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.3470449172576832, |
| "grad_norm": 3.1808290481567383, |
| "learning_rate": 4.968780148709239e-06, |
| "loss": 0.7377, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.3475177304964539, |
| "grad_norm": 2.956666946411133, |
| "learning_rate": 4.968681792008523e-06, |
| "loss": 0.65, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.3479905437352246, |
| "grad_norm": 2.9631855487823486, |
| "learning_rate": 4.9685832815938175e-06, |
| "loss": 0.677, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.3484633569739953, |
| "grad_norm": 2.501917600631714, |
| "learning_rate": 4.968484617471256e-06, |
| "loss": 0.6282, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.34893617021276596, |
| "grad_norm": 2.750779628753662, |
| "learning_rate": 4.968385799646981e-06, |
| "loss": 0.6507, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.34940898345153665, |
| "grad_norm": 2.872300624847412, |
| "learning_rate": 4.968286828127146e-06, |
| "loss": 0.5949, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.34988179669030733, |
| "grad_norm": 2.6316142082214355, |
| "learning_rate": 4.9681877029179124e-06, |
| "loss": 0.6328, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.350354609929078, |
| "grad_norm": 3.244364023208618, |
| "learning_rate": 4.968088424025454e-06, |
| "loss": 0.7393, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.3508274231678487, |
| "grad_norm": 2.620465040206909, |
| "learning_rate": 4.967988991455951e-06, |
| "loss": 0.6797, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.3513002364066194, |
| "grad_norm": 2.854513645172119, |
| "learning_rate": 4.967889405215596e-06, |
| "loss": 0.6368, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.3517730496453901, |
| "grad_norm": 2.579854726791382, |
| "learning_rate": 4.9677896653105886e-06, |
| "loss": 0.6489, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.35224586288416077, |
| "grad_norm": 3.0697381496429443, |
| "learning_rate": 4.96768977174714e-06, |
| "loss": 0.6313, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.35271867612293145, |
| "grad_norm": 3.369338035583496, |
| "learning_rate": 4.96758972453147e-06, |
| "loss": 0.7416, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.35319148936170214, |
| "grad_norm": 2.836221933364868, |
| "learning_rate": 4.967489523669807e-06, |
| "loss": 0.6422, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.3536643026004728, |
| "grad_norm": 2.929579496383667, |
| "learning_rate": 4.967389169168392e-06, |
| "loss": 0.6482, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.3541371158392435, |
| "grad_norm": 2.9243831634521484, |
| "learning_rate": 4.967288661033472e-06, |
| "loss": 0.5813, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.3546099290780142, |
| "grad_norm": 3.7555336952209473, |
| "learning_rate": 4.967187999271306e-06, |
| "loss": 0.6501, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3550827423167849, |
| "grad_norm": 3.4279143810272217, |
| "learning_rate": 4.9670871838881615e-06, |
| "loss": 0.6326, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 2.875066041946411, |
| "learning_rate": 4.9669862148903166e-06, |
| "loss": 0.664, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.35602836879432626, |
| "grad_norm": 3.130394697189331, |
| "learning_rate": 4.966885092284057e-06, |
| "loss": 0.706, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.35650118203309694, |
| "grad_norm": 2.9606287479400635, |
| "learning_rate": 4.96678381607568e-06, |
| "loss": 0.693, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.35697399527186763, |
| "grad_norm": 3.0584909915924072, |
| "learning_rate": 4.966682386271491e-06, |
| "loss": 0.6034, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.3574468085106383, |
| "grad_norm": 2.8215200901031494, |
| "learning_rate": 4.966580802877805e-06, |
| "loss": 0.6217, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.357919621749409, |
| "grad_norm": 2.7348055839538574, |
| "learning_rate": 4.966479065900949e-06, |
| "loss": 0.6194, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.3583924349881797, |
| "grad_norm": 3.2347466945648193, |
| "learning_rate": 4.966377175347257e-06, |
| "loss": 0.6377, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.3588652482269504, |
| "grad_norm": 3.311845302581787, |
| "learning_rate": 4.966275131223072e-06, |
| "loss": 0.6234, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.35933806146572106, |
| "grad_norm": 3.0384368896484375, |
| "learning_rate": 4.96617293353475e-06, |
| "loss": 0.609, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.35981087470449175, |
| "grad_norm": 3.516854763031006, |
| "learning_rate": 4.966070582288653e-06, |
| "loss": 0.6627, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.36028368794326243, |
| "grad_norm": 3.2425215244293213, |
| "learning_rate": 4.9659680774911534e-06, |
| "loss": 0.7355, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.3607565011820331, |
| "grad_norm": 3.2665750980377197, |
| "learning_rate": 4.965865419148636e-06, |
| "loss": 0.6787, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.3612293144208038, |
| "grad_norm": 2.729428291320801, |
| "learning_rate": 4.96576260726749e-06, |
| "loss": 0.6272, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.3617021276595745, |
| "grad_norm": 3.299969434738159, |
| "learning_rate": 4.965659641854119e-06, |
| "loss": 0.6552, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.3621749408983452, |
| "grad_norm": 2.7090916633605957, |
| "learning_rate": 4.965556522914934e-06, |
| "loss": 0.6661, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.36264775413711586, |
| "grad_norm": 2.488846778869629, |
| "learning_rate": 4.965453250456355e-06, |
| "loss": 0.5821, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.36312056737588655, |
| "grad_norm": 2.5267233848571777, |
| "learning_rate": 4.965349824484813e-06, |
| "loss": 0.5593, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.36359338061465724, |
| "grad_norm": 3.0646679401397705, |
| "learning_rate": 4.965246245006748e-06, |
| "loss": 0.6341, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.3640661938534279, |
| "grad_norm": 2.9877712726593018, |
| "learning_rate": 4.965142512028609e-06, |
| "loss": 0.7202, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3645390070921986, |
| "grad_norm": 3.7494113445281982, |
| "learning_rate": 4.965038625556854e-06, |
| "loss": 0.7643, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.3650118203309693, |
| "grad_norm": 2.8382890224456787, |
| "learning_rate": 4.964934585597954e-06, |
| "loss": 0.6522, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.3654846335697399, |
| "grad_norm": 3.091655731201172, |
| "learning_rate": 4.9648303921583854e-06, |
| "loss": 0.7117, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.3659574468085106, |
| "grad_norm": 3.0608325004577637, |
| "learning_rate": 4.964726045244635e-06, |
| "loss": 0.6538, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.3664302600472813, |
| "grad_norm": 2.8492867946624756, |
| "learning_rate": 4.964621544863203e-06, |
| "loss": 0.6079, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.366903073286052, |
| "grad_norm": 3.0669894218444824, |
| "learning_rate": 4.964516891020594e-06, |
| "loss": 0.6223, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.36737588652482267, |
| "grad_norm": 3.089984893798828, |
| "learning_rate": 4.964412083723325e-06, |
| "loss": 0.671, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.36784869976359336, |
| "grad_norm": 2.905242443084717, |
| "learning_rate": 4.964307122977921e-06, |
| "loss": 0.62, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.36832151300236404, |
| "grad_norm": 3.954436779022217, |
| "learning_rate": 4.964202008790918e-06, |
| "loss": 0.6535, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.36879432624113473, |
| "grad_norm": 2.6026058197021484, |
| "learning_rate": 4.9640967411688615e-06, |
| "loss": 0.5865, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3692671394799054, |
| "grad_norm": 2.9876346588134766, |
| "learning_rate": 4.963991320118306e-06, |
| "loss": 0.6698, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.3697399527186761, |
| "grad_norm": 2.9411263465881348, |
| "learning_rate": 4.963885745645815e-06, |
| "loss": 0.6173, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.3702127659574468, |
| "grad_norm": 2.5679805278778076, |
| "learning_rate": 4.963780017757962e-06, |
| "loss": 0.6285, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.3706855791962175, |
| "grad_norm": 3.3100640773773193, |
| "learning_rate": 4.963674136461332e-06, |
| "loss": 0.5968, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.37115839243498816, |
| "grad_norm": 3.1293699741363525, |
| "learning_rate": 4.963568101762515e-06, |
| "loss": 0.697, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.37163120567375885, |
| "grad_norm": 3.043853759765625, |
| "learning_rate": 4.963461913668115e-06, |
| "loss": 0.5881, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.37210401891252953, |
| "grad_norm": 3.07351016998291, |
| "learning_rate": 4.963355572184744e-06, |
| "loss": 0.6307, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.3725768321513002, |
| "grad_norm": 2.7381317615509033, |
| "learning_rate": 4.9632490773190225e-06, |
| "loss": 0.716, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.3730496453900709, |
| "grad_norm": 2.892221450805664, |
| "learning_rate": 4.963142429077582e-06, |
| "loss": 0.6867, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.3735224586288416, |
| "grad_norm": 3.133122205734253, |
| "learning_rate": 4.963035627467064e-06, |
| "loss": 0.659, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3739952718676123, |
| "grad_norm": 3.032599925994873, |
| "learning_rate": 4.962928672494116e-06, |
| "loss": 0.6848, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.37446808510638296, |
| "grad_norm": 3.0076355934143066, |
| "learning_rate": 4.9628215641654e-06, |
| "loss": 0.6549, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.37494089834515365, |
| "grad_norm": 2.8904454708099365, |
| "learning_rate": 4.962714302487585e-06, |
| "loss": 0.6484, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.37541371158392434, |
| "grad_norm": 2.881364107131958, |
| "learning_rate": 4.9626068874673486e-06, |
| "loss": 0.721, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.375886524822695, |
| "grad_norm": 3.11668062210083, |
| "learning_rate": 4.962499319111379e-06, |
| "loss": 0.7824, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.3763593380614657, |
| "grad_norm": 2.9201436042785645, |
| "learning_rate": 4.962391597426374e-06, |
| "loss": 0.6911, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.3768321513002364, |
| "grad_norm": 2.926598072052002, |
| "learning_rate": 4.962283722419043e-06, |
| "loss": 0.6715, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.3773049645390071, |
| "grad_norm": 2.7267675399780273, |
| "learning_rate": 4.962175694096101e-06, |
| "loss": 0.6111, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.37777777777777777, |
| "grad_norm": 3.194031000137329, |
| "learning_rate": 4.962067512464275e-06, |
| "loss": 0.6558, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.37825059101654845, |
| "grad_norm": 2.6249136924743652, |
| "learning_rate": 4.9619591775303e-06, |
| "loss": 0.6166, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.37872340425531914, |
| "grad_norm": 2.6356167793273926, |
| "learning_rate": 4.961850689300923e-06, |
| "loss": 0.6112, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.3791962174940898, |
| "grad_norm": 3.030724287033081, |
| "learning_rate": 4.961742047782898e-06, |
| "loss": 0.6511, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3796690307328605, |
| "grad_norm": 3.4987757205963135, |
| "learning_rate": 4.96163325298299e-06, |
| "loss": 0.5888, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.3801418439716312, |
| "grad_norm": 3.0371780395507812, |
| "learning_rate": 4.961524304907974e-06, |
| "loss": 0.6385, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.3806146572104019, |
| "grad_norm": 3.302570104598999, |
| "learning_rate": 4.961415203564632e-06, |
| "loss": 0.6515, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.38108747044917257, |
| "grad_norm": 2.7597038745880127, |
| "learning_rate": 4.961305948959759e-06, |
| "loss": 0.6126, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.38156028368794326, |
| "grad_norm": 2.789811849594116, |
| "learning_rate": 4.9611965411001575e-06, |
| "loss": 0.6601, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.38203309692671394, |
| "grad_norm": 3.0403921604156494, |
| "learning_rate": 4.961086979992639e-06, |
| "loss": 0.6947, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.38250591016548463, |
| "grad_norm": 3.2139980792999268, |
| "learning_rate": 4.960977265644026e-06, |
| "loss": 0.6876, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.3829787234042553, |
| "grad_norm": 2.918515205383301, |
| "learning_rate": 4.960867398061149e-06, |
| "loss": 0.5997, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.383451536643026, |
| "grad_norm": 3.197636604309082, |
| "learning_rate": 4.9607573772508495e-06, |
| "loss": 0.5754, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.3839243498817967, |
| "grad_norm": 2.8848466873168945, |
| "learning_rate": 4.960647203219979e-06, |
| "loss": 0.6424, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.3843971631205674, |
| "grad_norm": 3.4810187816619873, |
| "learning_rate": 4.960536875975397e-06, |
| "loss": 0.6851, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.38486997635933806, |
| "grad_norm": 3.713934898376465, |
| "learning_rate": 4.960426395523972e-06, |
| "loss": 0.6122, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.38534278959810875, |
| "grad_norm": 2.862600803375244, |
| "learning_rate": 4.960315761872585e-06, |
| "loss": 0.6493, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.38581560283687943, |
| "grad_norm": 3.133882522583008, |
| "learning_rate": 4.960204975028123e-06, |
| "loss": 0.7535, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.3862884160756501, |
| "grad_norm": 3.1526732444763184, |
| "learning_rate": 4.960094034997485e-06, |
| "loss": 0.6512, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.3867612293144208, |
| "grad_norm": 2.7213544845581055, |
| "learning_rate": 4.959982941787579e-06, |
| "loss": 0.6121, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.3872340425531915, |
| "grad_norm": 3.4935851097106934, |
| "learning_rate": 4.9598716954053214e-06, |
| "loss": 0.7852, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.3877068557919622, |
| "grad_norm": 2.691016435623169, |
| "learning_rate": 4.9597602958576395e-06, |
| "loss": 0.6861, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.38817966903073287, |
| "grad_norm": 2.8621015548706055, |
| "learning_rate": 4.959648743151469e-06, |
| "loss": 0.6262, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.38865248226950355, |
| "grad_norm": 3.3887462615966797, |
| "learning_rate": 4.959537037293758e-06, |
| "loss": 0.7103, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.38912529550827424, |
| "grad_norm": 2.7565438747406006, |
| "learning_rate": 4.95942517829146e-06, |
| "loss": 0.6471, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.3895981087470449, |
| "grad_norm": 2.7920358180999756, |
| "learning_rate": 4.959313166151541e-06, |
| "loss": 0.6239, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.3900709219858156, |
| "grad_norm": 3.18904185295105, |
| "learning_rate": 4.959201000880973e-06, |
| "loss": 0.7461, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.3905437352245863, |
| "grad_norm": 2.727872371673584, |
| "learning_rate": 4.959088682486743e-06, |
| "loss": 0.6333, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.391016548463357, |
| "grad_norm": 2.906378746032715, |
| "learning_rate": 4.958976210975844e-06, |
| "loss": 0.7547, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.39148936170212767, |
| "grad_norm": 2.96482515335083, |
| "learning_rate": 4.958863586355278e-06, |
| "loss": 0.6312, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.39196217494089836, |
| "grad_norm": 3.2890889644622803, |
| "learning_rate": 4.958750808632059e-06, |
| "loss": 0.6943, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.39243498817966904, |
| "grad_norm": 2.7004311084747314, |
| "learning_rate": 4.958637877813207e-06, |
| "loss": 0.5918, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.39290780141843973, |
| "grad_norm": 2.7487950325012207, |
| "learning_rate": 4.9585247939057566e-06, |
| "loss": 0.6201, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.3933806146572104, |
| "grad_norm": 2.7873897552490234, |
| "learning_rate": 4.958411556916747e-06, |
| "loss": 0.6268, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.3938534278959811, |
| "grad_norm": 2.8501343727111816, |
| "learning_rate": 4.958298166853229e-06, |
| "loss": 0.7119, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.3943262411347518, |
| "grad_norm": 3.0391547679901123, |
| "learning_rate": 4.958184623722265e-06, |
| "loss": 0.6375, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.3947990543735225, |
| "grad_norm": 2.850520133972168, |
| "learning_rate": 4.958070927530922e-06, |
| "loss": 0.5962, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.39527186761229316, |
| "grad_norm": 3.351914644241333, |
| "learning_rate": 4.957957078286281e-06, |
| "loss": 0.7247, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.39574468085106385, |
| "grad_norm": 2.9559543132781982, |
| "learning_rate": 4.957843075995431e-06, |
| "loss": 0.6571, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.39621749408983453, |
| "grad_norm": 3.225785255432129, |
| "learning_rate": 4.95772892066547e-06, |
| "loss": 0.7074, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.3966903073286052, |
| "grad_norm": 2.7842373847961426, |
| "learning_rate": 4.957614612303505e-06, |
| "loss": 0.6469, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.3971631205673759, |
| "grad_norm": 4.249724864959717, |
| "learning_rate": 4.957500150916655e-06, |
| "loss": 0.741, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.3976359338061466, |
| "grad_norm": 3.138221263885498, |
| "learning_rate": 4.957385536512046e-06, |
| "loss": 0.6676, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3981087470449173, |
| "grad_norm": 3.456423759460449, |
| "learning_rate": 4.957270769096816e-06, |
| "loss": 0.6877, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.39858156028368796, |
| "grad_norm": 2.8676278591156006, |
| "learning_rate": 4.957155848678109e-06, |
| "loss": 0.5986, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.39905437352245865, |
| "grad_norm": 2.705324411392212, |
| "learning_rate": 4.957040775263082e-06, |
| "loss": 0.6356, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.39952718676122934, |
| "grad_norm": 3.0767486095428467, |
| "learning_rate": 4.9569255488589e-06, |
| "loss": 0.6844, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.7787704467773438, |
| "learning_rate": 4.956810169472736e-06, |
| "loss": 0.6641, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.4004728132387707, |
| "grad_norm": 2.584277868270874, |
| "learning_rate": 4.956694637111777e-06, |
| "loss": 0.6256, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.4009456264775414, |
| "grad_norm": 2.751641273498535, |
| "learning_rate": 4.956578951783215e-06, |
| "loss": 0.5954, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.4014184397163121, |
| "grad_norm": 3.0181658267974854, |
| "learning_rate": 4.956463113494253e-06, |
| "loss": 0.6569, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.40189125295508277, |
| "grad_norm": 3.0933220386505127, |
| "learning_rate": 4.956347122252104e-06, |
| "loss": 0.6248, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.40236406619385345, |
| "grad_norm": 3.3767428398132324, |
| "learning_rate": 4.956230978063991e-06, |
| "loss": 0.719, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.40283687943262414, |
| "grad_norm": 3.7666573524475098, |
| "learning_rate": 4.956114680937145e-06, |
| "loss": 0.6467, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.4033096926713948, |
| "grad_norm": 2.9836843013763428, |
| "learning_rate": 4.955998230878808e-06, |
| "loss": 0.6993, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.4037825059101655, |
| "grad_norm": 2.981497049331665, |
| "learning_rate": 4.955881627896229e-06, |
| "loss": 0.6578, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.40425531914893614, |
| "grad_norm": 3.1369056701660156, |
| "learning_rate": 4.955764871996672e-06, |
| "loss": 0.6763, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.40472813238770683, |
| "grad_norm": 2.7675817012786865, |
| "learning_rate": 4.9556479631874036e-06, |
| "loss": 0.6488, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.4052009456264775, |
| "grad_norm": 3.035334825515747, |
| "learning_rate": 4.9555309014757034e-06, |
| "loss": 0.7076, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.4056737588652482, |
| "grad_norm": 3.493704319000244, |
| "learning_rate": 4.955413686868862e-06, |
| "loss": 0.6773, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.4061465721040189, |
| "grad_norm": 3.245487928390503, |
| "learning_rate": 4.9552963193741765e-06, |
| "loss": 0.6915, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.4066193853427896, |
| "grad_norm": 3.189969539642334, |
| "learning_rate": 4.955178798998956e-06, |
| "loss": 0.7318, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.40709219858156026, |
| "grad_norm": 2.7987146377563477, |
| "learning_rate": 4.955061125750517e-06, |
| "loss": 0.6162, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.40756501182033095, |
| "grad_norm": 3.020118474960327, |
| "learning_rate": 4.954943299636187e-06, |
| "loss": 0.6678, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.40803782505910163, |
| "grad_norm": 2.715463876724243, |
| "learning_rate": 4.954825320663302e-06, |
| "loss": 0.668, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.4085106382978723, |
| "grad_norm": 2.595050096511841, |
| "learning_rate": 4.9547071888392085e-06, |
| "loss": 0.6557, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.408983451536643, |
| "grad_norm": 3.131596088409424, |
| "learning_rate": 4.954588904171261e-06, |
| "loss": 0.6548, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.4094562647754137, |
| "grad_norm": 2.5742313861846924, |
| "learning_rate": 4.954470466666827e-06, |
| "loss": 0.6592, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.4099290780141844, |
| "grad_norm": 2.8612802028656006, |
| "learning_rate": 4.9543518763332785e-06, |
| "loss": 0.5391, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.41040189125295506, |
| "grad_norm": 2.8973186016082764, |
| "learning_rate": 4.954233133178001e-06, |
| "loss": 0.6649, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.41087470449172575, |
| "grad_norm": 2.802525043487549, |
| "learning_rate": 4.954114237208388e-06, |
| "loss": 0.6212, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.41134751773049644, |
| "grad_norm": 2.5919506549835205, |
| "learning_rate": 4.953995188431843e-06, |
| "loss": 0.6596, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.4118203309692671, |
| "grad_norm": 3.139169454574585, |
| "learning_rate": 4.953875986855777e-06, |
| "loss": 0.6799, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.4122931442080378, |
| "grad_norm": 3.99727725982666, |
| "learning_rate": 4.953756632487614e-06, |
| "loss": 0.6519, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.4127659574468085, |
| "grad_norm": 3.238706350326538, |
| "learning_rate": 4.953637125334784e-06, |
| "loss": 0.7361, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.4132387706855792, |
| "grad_norm": 2.780019998550415, |
| "learning_rate": 4.9535174654047295e-06, |
| "loss": 0.6406, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.41371158392434987, |
| "grad_norm": 2.7629551887512207, |
| "learning_rate": 4.953397652704901e-06, |
| "loss": 0.6131, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.41418439716312055, |
| "grad_norm": 2.8008246421813965, |
| "learning_rate": 4.9532776872427585e-06, |
| "loss": 0.6464, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.41465721040189124, |
| "grad_norm": 3.0970115661621094, |
| "learning_rate": 4.953157569025772e-06, |
| "loss": 0.7066, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.4151300236406619, |
| "grad_norm": 2.8375589847564697, |
| "learning_rate": 4.9530372980614195e-06, |
| "loss": 0.6551, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.4156028368794326, |
| "grad_norm": 2.718843936920166, |
| "learning_rate": 4.952916874357191e-06, |
| "loss": 0.5947, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.4160756501182033, |
| "grad_norm": 2.7104697227478027, |
| "learning_rate": 4.952796297920585e-06, |
| "loss": 0.6708, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.416548463356974, |
| "grad_norm": 2.8223445415496826, |
| "learning_rate": 4.952675568759108e-06, |
| "loss": 0.6214, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.41702127659574467, |
| "grad_norm": 2.6598153114318848, |
| "learning_rate": 4.952554686880279e-06, |
| "loss": 0.6116, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.41749408983451536, |
| "grad_norm": 2.8639824390411377, |
| "learning_rate": 4.952433652291623e-06, |
| "loss": 0.5971, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.41796690307328604, |
| "grad_norm": 2.9578304290771484, |
| "learning_rate": 4.952312465000677e-06, |
| "loss": 0.6785, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.41843971631205673, |
| "grad_norm": 2.872144937515259, |
| "learning_rate": 4.952191125014987e-06, |
| "loss": 0.6772, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.4189125295508274, |
| "grad_norm": 2.7513675689697266, |
| "learning_rate": 4.952069632342108e-06, |
| "loss": 0.702, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.4193853427895981, |
| "grad_norm": 2.9275078773498535, |
| "learning_rate": 4.951947986989606e-06, |
| "loss": 0.589, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.4198581560283688, |
| "grad_norm": 2.740549325942993, |
| "learning_rate": 4.951826188965053e-06, |
| "loss": 0.5942, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.4203309692671395, |
| "grad_norm": 2.92452073097229, |
| "learning_rate": 4.951704238276035e-06, |
| "loss": 0.6819, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.42080378250591016, |
| "grad_norm": 2.842491865158081, |
| "learning_rate": 4.951582134930144e-06, |
| "loss": 0.6304, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.42127659574468085, |
| "grad_norm": 2.613478422164917, |
| "learning_rate": 4.951459878934983e-06, |
| "loss": 0.6912, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.42174940898345153, |
| "grad_norm": 3.2408607006073, |
| "learning_rate": 4.951337470298165e-06, |
| "loss": 0.6755, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4222222222222222, |
| "grad_norm": 3.1022439002990723, |
| "learning_rate": 4.9512149090273125e-06, |
| "loss": 0.6138, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4226950354609929, |
| "grad_norm": 2.6418895721435547, |
| "learning_rate": 4.951092195130055e-06, |
| "loss": 0.639, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.4231678486997636, |
| "grad_norm": 3.010744333267212, |
| "learning_rate": 4.950969328614035e-06, |
| "loss": 0.7102, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.4236406619385343, |
| "grad_norm": 2.673292636871338, |
| "learning_rate": 4.950846309486901e-06, |
| "loss": 0.5676, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.42411347517730497, |
| "grad_norm": 3.6974737644195557, |
| "learning_rate": 4.950723137756314e-06, |
| "loss": 0.5722, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.42458628841607565, |
| "grad_norm": 3.69028902053833, |
| "learning_rate": 4.9505998134299435e-06, |
| "loss": 0.6337, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.42505910165484634, |
| "grad_norm": 3.2136125564575195, |
| "learning_rate": 4.950476336515469e-06, |
| "loss": 0.6469, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 2.7396016120910645, |
| "learning_rate": 4.950352707020577e-06, |
| "loss": 0.6656, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4260047281323877, |
| "grad_norm": 2.825416088104248, |
| "learning_rate": 4.950228924952967e-06, |
| "loss": 0.6298, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.4264775413711584, |
| "grad_norm": 3.401658535003662, |
| "learning_rate": 4.950104990320345e-06, |
| "loss": 0.778, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.4269503546099291, |
| "grad_norm": 2.7002272605895996, |
| "learning_rate": 4.9499809031304294e-06, |
| "loss": 0.6536, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.42742316784869977, |
| "grad_norm": 2.62386417388916, |
| "learning_rate": 4.949856663390945e-06, |
| "loss": 0.6629, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.42789598108747046, |
| "grad_norm": 2.584247589111328, |
| "learning_rate": 4.94973227110963e-06, |
| "loss": 0.5813, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.42836879432624114, |
| "grad_norm": 3.4365768432617188, |
| "learning_rate": 4.9496077262942265e-06, |
| "loss": 0.7648, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.42884160756501183, |
| "grad_norm": 2.8993639945983887, |
| "learning_rate": 4.949483028952492e-06, |
| "loss": 0.6696, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.4293144208037825, |
| "grad_norm": 2.922809362411499, |
| "learning_rate": 4.94935817909219e-06, |
| "loss": 0.6892, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.4297872340425532, |
| "grad_norm": 2.85478138923645, |
| "learning_rate": 4.9492331767210944e-06, |
| "loss": 0.536, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.4302600472813239, |
| "grad_norm": 2.8639259338378906, |
| "learning_rate": 4.949108021846988e-06, |
| "loss": 0.634, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.4307328605200946, |
| "grad_norm": 3.0533697605133057, |
| "learning_rate": 4.948982714477664e-06, |
| "loss": 0.6318, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.43120567375886526, |
| "grad_norm": 2.331674814224243, |
| "learning_rate": 4.9488572546209255e-06, |
| "loss": 0.6562, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.43167848699763595, |
| "grad_norm": 3.0154623985290527, |
| "learning_rate": 4.9487316422845835e-06, |
| "loss": 0.6675, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.43215130023640663, |
| "grad_norm": 2.7354514598846436, |
| "learning_rate": 4.948605877476459e-06, |
| "loss": 0.6012, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.4326241134751773, |
| "grad_norm": 2.863736629486084, |
| "learning_rate": 4.948479960204383e-06, |
| "loss": 0.6062, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.433096926713948, |
| "grad_norm": 3.01998233795166, |
| "learning_rate": 4.948353890476197e-06, |
| "loss": 0.6749, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.4335697399527187, |
| "grad_norm": 2.7550456523895264, |
| "learning_rate": 4.94822766829975e-06, |
| "loss": 0.6507, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.4340425531914894, |
| "grad_norm": 3.370572805404663, |
| "learning_rate": 4.948101293682901e-06, |
| "loss": 0.714, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.43451536643026006, |
| "grad_norm": 2.9736790657043457, |
| "learning_rate": 4.947974766633519e-06, |
| "loss": 0.729, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.43498817966903075, |
| "grad_norm": 3.1036548614501953, |
| "learning_rate": 4.947848087159483e-06, |
| "loss": 0.7547, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.43546099290780144, |
| "grad_norm": 2.895094871520996, |
| "learning_rate": 4.947721255268679e-06, |
| "loss": 0.6089, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.4359338061465721, |
| "grad_norm": 2.798476219177246, |
| "learning_rate": 4.947594270969005e-06, |
| "loss": 0.5432, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.4364066193853428, |
| "grad_norm": 2.7675702571868896, |
| "learning_rate": 4.94746713426837e-06, |
| "loss": 0.5693, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.4368794326241135, |
| "grad_norm": 2.6851553916931152, |
| "learning_rate": 4.947339845174687e-06, |
| "loss": 0.6503, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.4373522458628842, |
| "grad_norm": 2.909635543823242, |
| "learning_rate": 4.947212403695883e-06, |
| "loss": 0.6494, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.43782505910165487, |
| "grad_norm": 2.604526996612549, |
| "learning_rate": 4.947084809839894e-06, |
| "loss": 0.6349, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.43829787234042555, |
| "grad_norm": 3.118149518966675, |
| "learning_rate": 4.946957063614664e-06, |
| "loss": 0.6219, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.43877068557919624, |
| "grad_norm": 2.7452616691589355, |
| "learning_rate": 4.9468291650281465e-06, |
| "loss": 0.6096, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.4392434988179669, |
| "grad_norm": 3.30098819732666, |
| "learning_rate": 4.946701114088307e-06, |
| "loss": 0.6277, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.4397163120567376, |
| "grad_norm": 2.789482593536377, |
| "learning_rate": 4.946572910803116e-06, |
| "loss": 0.7, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.4401891252955083, |
| "grad_norm": 2.7283935546875, |
| "learning_rate": 4.946444555180559e-06, |
| "loss": 0.5375, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.440661938534279, |
| "grad_norm": 3.101304054260254, |
| "learning_rate": 4.946316047228627e-06, |
| "loss": 0.6131, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.44113475177304967, |
| "grad_norm": 3.573908805847168, |
| "learning_rate": 4.946187386955321e-06, |
| "loss": 0.7073, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.44160756501182036, |
| "grad_norm": 3.214979648590088, |
| "learning_rate": 4.946058574368653e-06, |
| "loss": 0.6508, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.44208037825059104, |
| "grad_norm": 3.145082712173462, |
| "learning_rate": 4.945929609476643e-06, |
| "loss": 0.64, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.4425531914893617, |
| "grad_norm": 2.991780996322632, |
| "learning_rate": 4.945800492287321e-06, |
| "loss": 0.6315, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.44302600472813236, |
| "grad_norm": 3.2441139221191406, |
| "learning_rate": 4.945671222808727e-06, |
| "loss": 0.7144, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.44349881796690305, |
| "grad_norm": 2.9397029876708984, |
| "learning_rate": 4.94554180104891e-06, |
| "loss": 0.6818, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.44397163120567373, |
| "grad_norm": 3.2471461296081543, |
| "learning_rate": 4.945412227015929e-06, |
| "loss": 0.6921, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 3.0882487297058105, |
| "learning_rate": 4.945282500717851e-06, |
| "loss": 0.718, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4449172576832151, |
| "grad_norm": 2.6035783290863037, |
| "learning_rate": 4.945152622162753e-06, |
| "loss": 0.621, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.4453900709219858, |
| "grad_norm": 2.83659029006958, |
| "learning_rate": 4.945022591358724e-06, |
| "loss": 0.6403, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.4458628841607565, |
| "grad_norm": 2.824463129043579, |
| "learning_rate": 4.944892408313859e-06, |
| "loss": 0.6594, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.44633569739952716, |
| "grad_norm": 2.753735065460205, |
| "learning_rate": 4.9447620730362645e-06, |
| "loss": 0.6116, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.44680851063829785, |
| "grad_norm": 3.0659725666046143, |
| "learning_rate": 4.944631585534056e-06, |
| "loss": 0.5983, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.44728132387706854, |
| "grad_norm": 2.969113349914551, |
| "learning_rate": 4.944500945815357e-06, |
| "loss": 0.6859, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.4477541371158392, |
| "grad_norm": 2.810303211212158, |
| "learning_rate": 4.944370153888303e-06, |
| "loss": 0.7025, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.4482269503546099, |
| "grad_norm": 3.027721643447876, |
| "learning_rate": 4.944239209761038e-06, |
| "loss": 0.7268, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.4486997635933806, |
| "grad_norm": 2.661503314971924, |
| "learning_rate": 4.944108113441716e-06, |
| "loss": 0.6702, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.4491725768321513, |
| "grad_norm": 2.738591432571411, |
| "learning_rate": 4.943976864938498e-06, |
| "loss": 0.6728, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.44964539007092197, |
| "grad_norm": 3.447505474090576, |
| "learning_rate": 4.943845464259557e-06, |
| "loss": 0.6586, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.45011820330969265, |
| "grad_norm": 3.0968854427337646, |
| "learning_rate": 4.943713911413075e-06, |
| "loss": 0.7666, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.45059101654846334, |
| "grad_norm": 2.4113779067993164, |
| "learning_rate": 4.943582206407244e-06, |
| "loss": 0.6173, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.451063829787234, |
| "grad_norm": 2.6357979774475098, |
| "learning_rate": 4.943450349250263e-06, |
| "loss": 0.5589, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.4515366430260047, |
| "grad_norm": 2.9182233810424805, |
| "learning_rate": 4.9433183399503425e-06, |
| "loss": 0.6252, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.4520094562647754, |
| "grad_norm": 2.832740306854248, |
| "learning_rate": 4.943186178515703e-06, |
| "loss": 0.6882, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.4524822695035461, |
| "grad_norm": 2.9508981704711914, |
| "learning_rate": 4.943053864954574e-06, |
| "loss": 0.5722, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.4529550827423168, |
| "grad_norm": 3.044729471206665, |
| "learning_rate": 4.9429213992751925e-06, |
| "loss": 0.6772, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.45342789598108746, |
| "grad_norm": 2.606003522872925, |
| "learning_rate": 4.9427887814858075e-06, |
| "loss": 0.6445, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.45390070921985815, |
| "grad_norm": 2.4634225368499756, |
| "learning_rate": 4.942656011594676e-06, |
| "loss": 0.6151, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.45437352245862883, |
| "grad_norm": 2.8872334957122803, |
| "learning_rate": 4.942523089610066e-06, |
| "loss": 0.6255, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.4548463356973995, |
| "grad_norm": 2.870605707168579, |
| "learning_rate": 4.942390015540253e-06, |
| "loss": 0.7481, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.4553191489361702, |
| "grad_norm": 2.952680826187134, |
| "learning_rate": 4.942256789393524e-06, |
| "loss": 0.5556, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.4557919621749409, |
| "grad_norm": 2.623680353164673, |
| "learning_rate": 4.9421234111781725e-06, |
| "loss": 0.6115, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.4562647754137116, |
| "grad_norm": 2.6933600902557373, |
| "learning_rate": 4.941989880902505e-06, |
| "loss": 0.6102, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.45673758865248226, |
| "grad_norm": 2.6047189235687256, |
| "learning_rate": 4.941856198574836e-06, |
| "loss": 0.612, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.45721040189125295, |
| "grad_norm": 2.779186725616455, |
| "learning_rate": 4.9417223642034885e-06, |
| "loss": 0.5424, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.45768321513002364, |
| "grad_norm": 2.6177165508270264, |
| "learning_rate": 4.941588377796795e-06, |
| "loss": 0.4661, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.4581560283687943, |
| "grad_norm": 2.959676742553711, |
| "learning_rate": 4.941454239363101e-06, |
| "loss": 0.6966, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.458628841607565, |
| "grad_norm": 2.9788379669189453, |
| "learning_rate": 4.941319948910756e-06, |
| "loss": 0.6181, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.4591016548463357, |
| "grad_norm": 4.642750263214111, |
| "learning_rate": 4.941185506448122e-06, |
| "loss": 0.5602, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.4595744680851064, |
| "grad_norm": 2.793002128601074, |
| "learning_rate": 4.941050911983572e-06, |
| "loss": 0.602, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.46004728132387707, |
| "grad_norm": 2.6833035945892334, |
| "learning_rate": 4.9409161655254845e-06, |
| "loss": 0.5549, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.46052009456264775, |
| "grad_norm": 3.905032157897949, |
| "learning_rate": 4.94078126708225e-06, |
| "loss": 0.6335, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.46099290780141844, |
| "grad_norm": 2.922609329223633, |
| "learning_rate": 4.94064621666227e-06, |
| "loss": 0.5839, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.4614657210401891, |
| "grad_norm": 2.8277416229248047, |
| "learning_rate": 4.940511014273952e-06, |
| "loss": 0.629, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.4619385342789598, |
| "grad_norm": 3.07511043548584, |
| "learning_rate": 4.940375659925714e-06, |
| "loss": 0.7058, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.4624113475177305, |
| "grad_norm": 3.65043044090271, |
| "learning_rate": 4.940240153625984e-06, |
| "loss": 0.7174, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.4628841607565012, |
| "grad_norm": 2.755167245864868, |
| "learning_rate": 4.9401044953832e-06, |
| "loss": 0.6548, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.46335697399527187, |
| "grad_norm": 2.9881057739257812, |
| "learning_rate": 4.939968685205808e-06, |
| "loss": 0.6245, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.46382978723404256, |
| "grad_norm": 2.9484212398529053, |
| "learning_rate": 4.939832723102266e-06, |
| "loss": 0.655, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.46430260047281324, |
| "grad_norm": 2.898918628692627, |
| "learning_rate": 4.939696609081038e-06, |
| "loss": 0.6178, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.46477541371158393, |
| "grad_norm": 2.7052435874938965, |
| "learning_rate": 4.9395603431506e-06, |
| "loss": 0.6393, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.4652482269503546, |
| "grad_norm": 2.5610013008117676, |
| "learning_rate": 4.939423925319436e-06, |
| "loss": 0.4847, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.4657210401891253, |
| "grad_norm": 3.229083299636841, |
| "learning_rate": 4.939287355596042e-06, |
| "loss": 0.6473, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.466193853427896, |
| "grad_norm": 2.907097816467285, |
| "learning_rate": 4.9391506339889195e-06, |
| "loss": 0.652, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.4666666666666667, |
| "grad_norm": 2.6929478645324707, |
| "learning_rate": 4.939013760506582e-06, |
| "loss": 0.6175, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.46713947990543736, |
| "grad_norm": 3.414813280105591, |
| "learning_rate": 4.938876735157554e-06, |
| "loss": 0.7597, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.46761229314420805, |
| "grad_norm": 3.297360420227051, |
| "learning_rate": 4.938739557950365e-06, |
| "loss": 0.6824, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.46808510638297873, |
| "grad_norm": 3.083155393600464, |
| "learning_rate": 4.938602228893557e-06, |
| "loss": 0.6505, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.4685579196217494, |
| "grad_norm": 2.9781153202056885, |
| "learning_rate": 4.938464747995681e-06, |
| "loss": 0.666, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.4690307328605201, |
| "grad_norm": 3.1494534015655518, |
| "learning_rate": 4.9383271152652975e-06, |
| "loss": 0.6422, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.4695035460992908, |
| "grad_norm": 2.547868490219116, |
| "learning_rate": 4.938189330710976e-06, |
| "loss": 0.5766, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.4699763593380615, |
| "grad_norm": 2.684736967086792, |
| "learning_rate": 4.938051394341297e-06, |
| "loss": 0.6407, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.47044917257683216, |
| "grad_norm": 2.9619693756103516, |
| "learning_rate": 4.937913306164847e-06, |
| "loss": 0.6936, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.47092198581560285, |
| "grad_norm": 2.9698498249053955, |
| "learning_rate": 4.937775066190227e-06, |
| "loss": 0.6464, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.47139479905437354, |
| "grad_norm": 3.121049642562866, |
| "learning_rate": 4.937636674426042e-06, |
| "loss": 0.6383, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.4718676122931442, |
| "grad_norm": 3.113672971725464, |
| "learning_rate": 4.93749813088091e-06, |
| "loss": 0.6892, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.4723404255319149, |
| "grad_norm": 3.126113176345825, |
| "learning_rate": 4.937359435563458e-06, |
| "loss": 0.6728, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.4728132387706856, |
| "grad_norm": 3.353966236114502, |
| "learning_rate": 4.937220588482321e-06, |
| "loss": 0.6041, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4732860520094563, |
| "grad_norm": 2.8860628604888916, |
| "learning_rate": 4.937081589646144e-06, |
| "loss": 0.6798, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.47375886524822697, |
| "grad_norm": 3.0510590076446533, |
| "learning_rate": 4.936942439063584e-06, |
| "loss": 0.5841, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.47423167848699765, |
| "grad_norm": 2.6998369693756104, |
| "learning_rate": 4.936803136743303e-06, |
| "loss": 0.6403, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.47470449172576834, |
| "grad_norm": 2.875347137451172, |
| "learning_rate": 4.9366636826939765e-06, |
| "loss": 0.5811, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.475177304964539, |
| "grad_norm": 2.9122262001037598, |
| "learning_rate": 4.936524076924287e-06, |
| "loss": 0.6852, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.4756501182033097, |
| "grad_norm": 2.5167057514190674, |
| "learning_rate": 4.9363843194429265e-06, |
| "loss": 0.5367, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.4761229314420804, |
| "grad_norm": 2.5745551586151123, |
| "learning_rate": 4.9362444102585985e-06, |
| "loss": 0.6241, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.4765957446808511, |
| "grad_norm": 2.5024216175079346, |
| "learning_rate": 4.9361043493800125e-06, |
| "loss": 0.6133, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.47706855791962177, |
| "grad_norm": 2.7281384468078613, |
| "learning_rate": 4.935964136815892e-06, |
| "loss": 0.6834, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.47754137115839246, |
| "grad_norm": 3.0118913650512695, |
| "learning_rate": 4.935823772574965e-06, |
| "loss": 0.6922, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.47801418439716314, |
| "grad_norm": 3.016216993331909, |
| "learning_rate": 4.935683256665973e-06, |
| "loss": 0.6653, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.47848699763593383, |
| "grad_norm": 2.9526784420013428, |
| "learning_rate": 4.9355425890976636e-06, |
| "loss": 0.6423, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.4789598108747045, |
| "grad_norm": 6.222797393798828, |
| "learning_rate": 4.9354017698787985e-06, |
| "loss": 0.5884, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.4794326241134752, |
| "grad_norm": 2.6553597450256348, |
| "learning_rate": 4.935260799018143e-06, |
| "loss": 0.6624, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.4799054373522459, |
| "grad_norm": 3.0942065715789795, |
| "learning_rate": 4.935119676524475e-06, |
| "loss": 0.6623, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.4803782505910166, |
| "grad_norm": 2.626359224319458, |
| "learning_rate": 4.934978402406585e-06, |
| "loss": 0.6195, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.4808510638297872, |
| "grad_norm": 2.7954699993133545, |
| "learning_rate": 4.934836976673265e-06, |
| "loss": 0.5545, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.4813238770685579, |
| "grad_norm": 2.913557291030884, |
| "learning_rate": 4.934695399333324e-06, |
| "loss": 0.6288, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.4817966903073286, |
| "grad_norm": 3.1043739318847656, |
| "learning_rate": 4.9345536703955746e-06, |
| "loss": 0.6771, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.48226950354609927, |
| "grad_norm": 2.789357900619507, |
| "learning_rate": 4.934411789868845e-06, |
| "loss": 0.6227, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.48274231678486995, |
| "grad_norm": 2.480609655380249, |
| "learning_rate": 4.934269757761967e-06, |
| "loss": 0.5779, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.48321513002364064, |
| "grad_norm": 2.7946252822875977, |
| "learning_rate": 4.934127574083785e-06, |
| "loss": 0.6166, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.4836879432624113, |
| "grad_norm": 3.0670509338378906, |
| "learning_rate": 4.933985238843153e-06, |
| "loss": 0.7766, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.484160756501182, |
| "grad_norm": 2.8567559719085693, |
| "learning_rate": 4.933842752048932e-06, |
| "loss": 0.5088, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.4846335697399527, |
| "grad_norm": 2.5674657821655273, |
| "learning_rate": 4.933700113709996e-06, |
| "loss": 0.6036, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.4851063829787234, |
| "grad_norm": 2.782339096069336, |
| "learning_rate": 4.933557323835224e-06, |
| "loss": 0.5335, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.48557919621749407, |
| "grad_norm": 2.6334071159362793, |
| "learning_rate": 4.93341438243351e-06, |
| "loss": 0.6327, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.48605200945626476, |
| "grad_norm": 3.0853965282440186, |
| "learning_rate": 4.933271289513751e-06, |
| "loss": 0.7102, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.48652482269503544, |
| "grad_norm": 2.619997501373291, |
| "learning_rate": 4.933128045084859e-06, |
| "loss": 0.6138, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.48699763593380613, |
| "grad_norm": 2.8316116333007812, |
| "learning_rate": 4.932984649155753e-06, |
| "loss": 0.6346, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.4874704491725768, |
| "grad_norm": 3.153486490249634, |
| "learning_rate": 4.932841101735361e-06, |
| "loss": 0.7626, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.4879432624113475, |
| "grad_norm": 3.1831274032592773, |
| "learning_rate": 4.9326974028326214e-06, |
| "loss": 0.6607, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.4884160756501182, |
| "grad_norm": 2.791078567504883, |
| "learning_rate": 4.932553552456481e-06, |
| "loss": 0.6141, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.4888888888888889, |
| "grad_norm": 2.627263307571411, |
| "learning_rate": 4.932409550615898e-06, |
| "loss": 0.6777, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.48936170212765956, |
| "grad_norm": 2.8550007343292236, |
| "learning_rate": 4.932265397319838e-06, |
| "loss": 0.6379, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.48983451536643025, |
| "grad_norm": 4.505824089050293, |
| "learning_rate": 4.932121092577276e-06, |
| "loss": 0.5892, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.49030732860520093, |
| "grad_norm": 3.100191116333008, |
| "learning_rate": 4.931976636397199e-06, |
| "loss": 0.6443, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.4907801418439716, |
| "grad_norm": 2.921494245529175, |
| "learning_rate": 4.9318320287886e-06, |
| "loss": 0.6821, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.4912529550827423, |
| "grad_norm": 4.577807903289795, |
| "learning_rate": 4.931687269760485e-06, |
| "loss": 0.5946, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.491725768321513, |
| "grad_norm": 2.7347636222839355, |
| "learning_rate": 4.931542359321865e-06, |
| "loss": 0.5689, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.4921985815602837, |
| "grad_norm": 2.5289158821105957, |
| "learning_rate": 4.931397297481765e-06, |
| "loss": 0.5632, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.49267139479905436, |
| "grad_norm": 3.3518471717834473, |
| "learning_rate": 4.9312520842492165e-06, |
| "loss": 0.6349, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.49314420803782505, |
| "grad_norm": 3.0469748973846436, |
| "learning_rate": 4.931106719633261e-06, |
| "loss": 0.5734, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.49361702127659574, |
| "grad_norm": 3.104682445526123, |
| "learning_rate": 4.930961203642951e-06, |
| "loss": 0.6101, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.4940898345153664, |
| "grad_norm": 2.776705503463745, |
| "learning_rate": 4.930815536287346e-06, |
| "loss": 0.6397, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.4945626477541371, |
| "grad_norm": 2.760380983352661, |
| "learning_rate": 4.930669717575516e-06, |
| "loss": 0.668, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.4950354609929078, |
| "grad_norm": 2.70084547996521, |
| "learning_rate": 4.930523747516541e-06, |
| "loss": 0.5729, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.4955082742316785, |
| "grad_norm": 2.7319583892822266, |
| "learning_rate": 4.930377626119511e-06, |
| "loss": 0.6258, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.49598108747044917, |
| "grad_norm": 3.2515223026275635, |
| "learning_rate": 4.930231353393521e-06, |
| "loss": 0.7412, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.49645390070921985, |
| "grad_norm": 3.0646486282348633, |
| "learning_rate": 4.930084929347682e-06, |
| "loss": 0.5809, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.49692671394799054, |
| "grad_norm": 3.1621921062469482, |
| "learning_rate": 4.9299383539911096e-06, |
| "loss": 0.6282, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.4973995271867612, |
| "grad_norm": 2.864713191986084, |
| "learning_rate": 4.929791627332931e-06, |
| "loss": 0.6263, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.4978723404255319, |
| "grad_norm": 3.181016683578491, |
| "learning_rate": 4.929644749382283e-06, |
| "loss": 0.5697, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.4983451536643026, |
| "grad_norm": 2.9064836502075195, |
| "learning_rate": 4.929497720148309e-06, |
| "loss": 0.6161, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.4988179669030733, |
| "grad_norm": 3.058112859725952, |
| "learning_rate": 4.9293505396401655e-06, |
| "loss": 0.6477, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.49929078014184397, |
| "grad_norm": 2.5227596759796143, |
| "learning_rate": 4.929203207867016e-06, |
| "loss": 0.5819, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.49976359338061466, |
| "grad_norm": 3.386862277984619, |
| "learning_rate": 4.929055724838035e-06, |
| "loss": 0.7342, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.5002364066193853, |
| "grad_norm": 3.368346929550171, |
| "learning_rate": 4.928908090562404e-06, |
| "loss": 0.6622, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.500709219858156, |
| "grad_norm": 2.9108314514160156, |
| "learning_rate": 4.928760305049317e-06, |
| "loss": 0.6598, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.5011820330969267, |
| "grad_norm": 2.822305917739868, |
| "learning_rate": 4.928612368307977e-06, |
| "loss": 0.5841, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.5016548463356973, |
| "grad_norm": 2.689131259918213, |
| "learning_rate": 4.928464280347592e-06, |
| "loss": 0.6631, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.502127659574468, |
| "grad_norm": 3.337214946746826, |
| "learning_rate": 4.9283160411773864e-06, |
| "loss": 0.6105, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.5026004728132387, |
| "grad_norm": 3.035911798477173, |
| "learning_rate": 4.928167650806588e-06, |
| "loss": 0.6981, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.5030732860520094, |
| "grad_norm": 2.8820855617523193, |
| "learning_rate": 4.9280191092444375e-06, |
| "loss": 0.6408, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.5035460992907801, |
| "grad_norm": 3.080432415008545, |
| "learning_rate": 4.927870416500183e-06, |
| "loss": 0.6398, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.5040189125295508, |
| "grad_norm": 2.761612892150879, |
| "learning_rate": 4.927721572583084e-06, |
| "loss": 0.6126, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.5044917257683215, |
| "grad_norm": 2.8561882972717285, |
| "learning_rate": 4.927572577502408e-06, |
| "loss": 0.584, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.5049645390070922, |
| "grad_norm": 3.3386311531066895, |
| "learning_rate": 4.927423431267432e-06, |
| "loss": 0.6666, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.5054373522458628, |
| "grad_norm": 2.632906675338745, |
| "learning_rate": 4.927274133887443e-06, |
| "loss": 0.632, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.5059101654846335, |
| "grad_norm": 2.8737308979034424, |
| "learning_rate": 4.927124685371737e-06, |
| "loss": 0.6051, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.5063829787234042, |
| "grad_norm": 3.042222738265991, |
| "learning_rate": 4.926975085729619e-06, |
| "loss": 0.6954, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.5068557919621749, |
| "grad_norm": 3.3341481685638428, |
| "learning_rate": 4.926825334970404e-06, |
| "loss": 0.7148, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.5073286052009456, |
| "grad_norm": 2.7415387630462646, |
| "learning_rate": 4.926675433103418e-06, |
| "loss": 0.5456, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.5078014184397163, |
| "grad_norm": 2.7545325756073, |
| "learning_rate": 4.926525380137993e-06, |
| "loss": 0.6213, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.508274231678487, |
| "grad_norm": 2.9153690338134766, |
| "learning_rate": 4.926375176083472e-06, |
| "loss": 0.6466, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.5087470449172576, |
| "grad_norm": 4.210638523101807, |
| "learning_rate": 4.926224820949209e-06, |
| "loss": 0.6192, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.5092198581560283, |
| "grad_norm": 2.4357898235321045, |
| "learning_rate": 4.926074314744565e-06, |
| "loss": 0.594, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.509692671394799, |
| "grad_norm": 2.8004701137542725, |
| "learning_rate": 4.92592365747891e-06, |
| "loss": 0.6276, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.5101654846335697, |
| "grad_norm": 2.920675039291382, |
| "learning_rate": 4.925772849161628e-06, |
| "loss": 0.6043, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 2.791555404663086, |
| "learning_rate": 4.9256218898021055e-06, |
| "loss": 0.6837, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5111111111111111, |
| "grad_norm": 3.1702463626861572, |
| "learning_rate": 4.925470779409746e-06, |
| "loss": 0.668, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.5115839243498818, |
| "grad_norm": 2.7149479389190674, |
| "learning_rate": 4.925319517993955e-06, |
| "loss": 0.5842, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.5120567375886524, |
| "grad_norm": 2.916311025619507, |
| "learning_rate": 4.925168105564153e-06, |
| "loss": 0.6893, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.5125295508274231, |
| "grad_norm": 2.917654514312744, |
| "learning_rate": 4.925016542129767e-06, |
| "loss": 0.6513, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.5130023640661938, |
| "grad_norm": 2.5568928718566895, |
| "learning_rate": 4.924864827700234e-06, |
| "loss": 0.6177, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.5134751773049645, |
| "grad_norm": 2.816720485687256, |
| "learning_rate": 4.924712962285001e-06, |
| "loss": 0.5833, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.5139479905437352, |
| "grad_norm": 2.6989188194274902, |
| "learning_rate": 4.9245609458935235e-06, |
| "loss": 0.6332, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.5144208037825059, |
| "grad_norm": 2.959599494934082, |
| "learning_rate": 4.924408778535268e-06, |
| "loss": 0.626, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.5148936170212766, |
| "grad_norm": 2.872814416885376, |
| "learning_rate": 4.924256460219708e-06, |
| "loss": 0.6407, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.5153664302600472, |
| "grad_norm": 2.6989097595214844, |
| "learning_rate": 4.924103990956329e-06, |
| "loss": 0.6391, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.5158392434988179, |
| "grad_norm": 2.986492156982422, |
| "learning_rate": 4.9239513707546235e-06, |
| "loss": 0.6911, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.5163120567375886, |
| "grad_norm": 3.069920301437378, |
| "learning_rate": 4.9237985996240954e-06, |
| "loss": 0.671, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.5167848699763593, |
| "grad_norm": 2.8214917182922363, |
| "learning_rate": 4.9236456775742555e-06, |
| "loss": 0.5885, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.51725768321513, |
| "grad_norm": 2.9416961669921875, |
| "learning_rate": 4.923492604614627e-06, |
| "loss": 0.6293, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.5177304964539007, |
| "grad_norm": 2.761780023574829, |
| "learning_rate": 4.923339380754741e-06, |
| "loss": 0.649, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.5182033096926714, |
| "grad_norm": 2.7648792266845703, |
| "learning_rate": 4.923186006004138e-06, |
| "loss": 0.5906, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.518676122931442, |
| "grad_norm": 3.5535428524017334, |
| "learning_rate": 4.923032480372367e-06, |
| "loss": 0.7138, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.5191489361702127, |
| "grad_norm": 2.6252479553222656, |
| "learning_rate": 4.922878803868988e-06, |
| "loss": 0.5499, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.5196217494089834, |
| "grad_norm": 2.901002883911133, |
| "learning_rate": 4.9227249765035715e-06, |
| "loss": 0.6991, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.5200945626477541, |
| "grad_norm": 2.621877431869507, |
| "learning_rate": 4.9225709982856925e-06, |
| "loss": 0.6269, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5205673758865248, |
| "grad_norm": 2.872483015060425, |
| "learning_rate": 4.92241686922494e-06, |
| "loss": 0.6657, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.5210401891252955, |
| "grad_norm": 2.730447769165039, |
| "learning_rate": 4.922262589330912e-06, |
| "loss": 0.6061, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.5215130023640662, |
| "grad_norm": 2.646247386932373, |
| "learning_rate": 4.922108158613213e-06, |
| "loss": 0.5923, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.5219858156028369, |
| "grad_norm": 2.6488895416259766, |
| "learning_rate": 4.92195357708146e-06, |
| "loss": 0.6293, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.5224586288416075, |
| "grad_norm": 2.756338357925415, |
| "learning_rate": 4.921798844745278e-06, |
| "loss": 0.6374, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.5229314420803782, |
| "grad_norm": 3.1441280841827393, |
| "learning_rate": 4.921643961614301e-06, |
| "loss": 0.6652, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.5234042553191489, |
| "grad_norm": 3.050002098083496, |
| "learning_rate": 4.921488927698172e-06, |
| "loss": 0.6809, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.5238770685579196, |
| "grad_norm": 2.71750807762146, |
| "learning_rate": 4.921333743006547e-06, |
| "loss": 0.6266, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.5243498817966903, |
| "grad_norm": 2.8439245223999023, |
| "learning_rate": 4.921178407549086e-06, |
| "loss": 0.5663, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.524822695035461, |
| "grad_norm": 3.0722241401672363, |
| "learning_rate": 4.921022921335464e-06, |
| "loss": 0.6791, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5252955082742317, |
| "grad_norm": 3.4381656646728516, |
| "learning_rate": 4.920867284375358e-06, |
| "loss": 0.6687, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.5257683215130023, |
| "grad_norm": 2.819812774658203, |
| "learning_rate": 4.920711496678463e-06, |
| "loss": 0.6299, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.526241134751773, |
| "grad_norm": 3.6587414741516113, |
| "learning_rate": 4.9205555582544765e-06, |
| "loss": 0.7392, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.5267139479905437, |
| "grad_norm": 2.774296522140503, |
| "learning_rate": 4.920399469113109e-06, |
| "loss": 0.6652, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.5271867612293144, |
| "grad_norm": 2.7480580806732178, |
| "learning_rate": 4.920243229264081e-06, |
| "loss": 0.596, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.5276595744680851, |
| "grad_norm": 3.213057518005371, |
| "learning_rate": 4.920086838717119e-06, |
| "loss": 0.6986, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.5281323877068558, |
| "grad_norm": 2.940546989440918, |
| "learning_rate": 4.919930297481962e-06, |
| "loss": 0.6481, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.5286052009456265, |
| "grad_norm": 2.5970494747161865, |
| "learning_rate": 4.9197736055683555e-06, |
| "loss": 0.5658, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.5290780141843971, |
| "grad_norm": 4.49385404586792, |
| "learning_rate": 4.919616762986057e-06, |
| "loss": 0.605, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.5295508274231678, |
| "grad_norm": 2.971857786178589, |
| "learning_rate": 4.919459769744833e-06, |
| "loss": 0.6539, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5300236406619385, |
| "grad_norm": 2.6192965507507324, |
| "learning_rate": 4.919302625854457e-06, |
| "loss": 0.6226, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.5304964539007092, |
| "grad_norm": 2.665088176727295, |
| "learning_rate": 4.919145331324716e-06, |
| "loss": 0.6647, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.5309692671394799, |
| "grad_norm": 2.612126111984253, |
| "learning_rate": 4.918987886165403e-06, |
| "loss": 0.6965, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.5314420803782506, |
| "grad_norm": 3.80017352104187, |
| "learning_rate": 4.9188302903863205e-06, |
| "loss": 0.7396, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.5319148936170213, |
| "grad_norm": 2.781752824783325, |
| "learning_rate": 4.918672543997282e-06, |
| "loss": 0.5985, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.532387706855792, |
| "grad_norm": 2.6067914962768555, |
| "learning_rate": 4.91851464700811e-06, |
| "loss": 0.6159, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.5328605200945626, |
| "grad_norm": 2.670807123184204, |
| "learning_rate": 4.918356599428636e-06, |
| "loss": 0.5958, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 2.608611822128296, |
| "learning_rate": 4.9181984012687e-06, |
| "loss": 0.5768, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.533806146572104, |
| "grad_norm": 2.586764097213745, |
| "learning_rate": 4.918040052538154e-06, |
| "loss": 0.661, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.5342789598108747, |
| "grad_norm": 3.1317451000213623, |
| "learning_rate": 4.917881553246856e-06, |
| "loss": 0.6626, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5347517730496454, |
| "grad_norm": 2.7135281562805176, |
| "learning_rate": 4.917722903404676e-06, |
| "loss": 0.6572, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.5352245862884161, |
| "grad_norm": 3.4546358585357666, |
| "learning_rate": 4.917564103021493e-06, |
| "loss": 0.5597, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.5356973995271868, |
| "grad_norm": 3.0943493843078613, |
| "learning_rate": 4.917405152107193e-06, |
| "loss": 0.7258, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.5361702127659574, |
| "grad_norm": 2.6069352626800537, |
| "learning_rate": 4.917246050671674e-06, |
| "loss": 0.6209, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.5366430260047281, |
| "grad_norm": 2.584883689880371, |
| "learning_rate": 4.917086798724844e-06, |
| "loss": 0.658, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.5371158392434988, |
| "grad_norm": 3.001976490020752, |
| "learning_rate": 4.9169273962766166e-06, |
| "loss": 0.6306, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.5375886524822695, |
| "grad_norm": 2.5013928413391113, |
| "learning_rate": 4.916767843336918e-06, |
| "loss": 0.572, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.5380614657210402, |
| "grad_norm": 2.9114553928375244, |
| "learning_rate": 4.916608139915684e-06, |
| "loss": 0.5841, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.5385342789598109, |
| "grad_norm": 2.8878467082977295, |
| "learning_rate": 4.9164482860228564e-06, |
| "loss": 0.6654, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.5390070921985816, |
| "grad_norm": 2.9827866554260254, |
| "learning_rate": 4.91628828166839e-06, |
| "loss": 0.6674, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5394799054373522, |
| "grad_norm": 3.8696281909942627, |
| "learning_rate": 4.916128126862248e-06, |
| "loss": 0.6241, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.5399527186761229, |
| "grad_norm": 2.9556291103363037, |
| "learning_rate": 4.915967821614402e-06, |
| "loss": 0.6478, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.5404255319148936, |
| "grad_norm": 2.392942428588867, |
| "learning_rate": 4.915807365934834e-06, |
| "loss": 0.6097, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.5408983451536643, |
| "grad_norm": 3.032235860824585, |
| "learning_rate": 4.915646759833534e-06, |
| "loss": 0.7193, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.541371158392435, |
| "grad_norm": 2.840416193008423, |
| "learning_rate": 4.915486003320501e-06, |
| "loss": 0.5506, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.5418439716312057, |
| "grad_norm": 2.5438895225524902, |
| "learning_rate": 4.915325096405747e-06, |
| "loss": 0.6487, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.5423167848699764, |
| "grad_norm": 2.544334650039673, |
| "learning_rate": 4.9151640390992905e-06, |
| "loss": 0.6168, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.542789598108747, |
| "grad_norm": 2.8535678386688232, |
| "learning_rate": 4.91500283141116e-06, |
| "loss": 0.678, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.5432624113475177, |
| "grad_norm": 2.8086955547332764, |
| "learning_rate": 4.9148414733513915e-06, |
| "loss": 0.6473, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.5437352245862884, |
| "grad_norm": 2.4709885120391846, |
| "learning_rate": 4.914679964930034e-06, |
| "loss": 0.6797, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5442080378250591, |
| "grad_norm": 2.8546934127807617, |
| "learning_rate": 4.9145183061571435e-06, |
| "loss": 0.6247, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.5446808510638298, |
| "grad_norm": 2.991184711456299, |
| "learning_rate": 4.9143564970427844e-06, |
| "loss": 0.5977, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.5451536643026005, |
| "grad_norm": 3.011216402053833, |
| "learning_rate": 4.914194537597033e-06, |
| "loss": 0.7005, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.5456264775413712, |
| "grad_norm": 2.807521343231201, |
| "learning_rate": 4.9140324278299744e-06, |
| "loss": 0.5412, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.5460992907801419, |
| "grad_norm": 3.0401229858398438, |
| "learning_rate": 4.913870167751701e-06, |
| "loss": 0.6394, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.5465721040189125, |
| "grad_norm": 2.853914976119995, |
| "learning_rate": 4.913707757372317e-06, |
| "loss": 0.6745, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.5470449172576832, |
| "grad_norm": 4.505620956420898, |
| "learning_rate": 4.913545196701935e-06, |
| "loss": 0.6668, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.5475177304964539, |
| "grad_norm": 3.0505781173706055, |
| "learning_rate": 4.913382485750676e-06, |
| "loss": 0.6926, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.5479905437352246, |
| "grad_norm": 2.798435688018799, |
| "learning_rate": 4.913219624528672e-06, |
| "loss": 0.605, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.5484633569739953, |
| "grad_norm": 2.7814908027648926, |
| "learning_rate": 4.913056613046065e-06, |
| "loss": 0.6678, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.548936170212766, |
| "grad_norm": 3.2089321613311768, |
| "learning_rate": 4.9128934513130025e-06, |
| "loss": 0.5995, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.5494089834515367, |
| "grad_norm": 2.7699952125549316, |
| "learning_rate": 4.9127301393396455e-06, |
| "loss": 0.7062, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.5498817966903073, |
| "grad_norm": 2.859368324279785, |
| "learning_rate": 4.912566677136162e-06, |
| "loss": 0.6063, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.550354609929078, |
| "grad_norm": 2.727334499359131, |
| "learning_rate": 4.91240306471273e-06, |
| "loss": 0.6848, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.5508274231678487, |
| "grad_norm": 2.6017510890960693, |
| "learning_rate": 4.912239302079537e-06, |
| "loss": 0.5808, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.5513002364066194, |
| "grad_norm": 3.539583206176758, |
| "learning_rate": 4.912075389246781e-06, |
| "loss": 0.7053, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.5517730496453901, |
| "grad_norm": 2.918280601501465, |
| "learning_rate": 4.911911326224666e-06, |
| "loss": 0.5904, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.5522458628841608, |
| "grad_norm": 3.0067362785339355, |
| "learning_rate": 4.9117471130234095e-06, |
| "loss": 0.6392, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.5527186761229315, |
| "grad_norm": 2.4374797344207764, |
| "learning_rate": 4.911582749653236e-06, |
| "loss": 0.5793, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.5531914893617021, |
| "grad_norm": 3.121182918548584, |
| "learning_rate": 4.911418236124378e-06, |
| "loss": 0.6636, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5536643026004728, |
| "grad_norm": 3.1289851665496826, |
| "learning_rate": 4.91125357244708e-06, |
| "loss": 0.656, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.5541371158392435, |
| "grad_norm": 2.7034592628479004, |
| "learning_rate": 4.911088758631596e-06, |
| "loss": 0.6001, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.5546099290780142, |
| "grad_norm": 2.710146188735962, |
| "learning_rate": 4.910923794688187e-06, |
| "loss": 0.6007, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.5550827423167849, |
| "grad_norm": 2.5424487590789795, |
| "learning_rate": 4.910758680627124e-06, |
| "loss": 0.5193, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 2.615893602371216, |
| "learning_rate": 4.91059341645869e-06, |
| "loss": 0.5525, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.5560283687943263, |
| "grad_norm": 3.3179728984832764, |
| "learning_rate": 4.910428002193174e-06, |
| "loss": 0.7285, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.556501182033097, |
| "grad_norm": 2.7234175205230713, |
| "learning_rate": 4.910262437840875e-06, |
| "loss": 0.574, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.5569739952718676, |
| "grad_norm": 3.0416605472564697, |
| "learning_rate": 4.9100967234121034e-06, |
| "loss": 0.5623, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.5574468085106383, |
| "grad_norm": 3.067786455154419, |
| "learning_rate": 4.909930858917177e-06, |
| "loss": 0.6491, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.557919621749409, |
| "grad_norm": 3.0037379264831543, |
| "learning_rate": 4.909764844366422e-06, |
| "loss": 0.5696, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5583924349881797, |
| "grad_norm": 2.966179609298706, |
| "learning_rate": 4.909598679770178e-06, |
| "loss": 0.6042, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.5588652482269504, |
| "grad_norm": 2.6000657081604004, |
| "learning_rate": 4.909432365138789e-06, |
| "loss": 0.5883, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.5593380614657211, |
| "grad_norm": 2.6794495582580566, |
| "learning_rate": 4.909265900482612e-06, |
| "loss": 0.6809, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.5598108747044918, |
| "grad_norm": 2.6765122413635254, |
| "learning_rate": 4.9090992858120115e-06, |
| "loss": 0.6601, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.5602836879432624, |
| "grad_norm": 2.6051928997039795, |
| "learning_rate": 4.908932521137363e-06, |
| "loss": 0.5946, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.5607565011820331, |
| "grad_norm": 3.0405542850494385, |
| "learning_rate": 4.908765606469048e-06, |
| "loss": 0.6998, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.5612293144208038, |
| "grad_norm": 2.7975668907165527, |
| "learning_rate": 4.908598541817462e-06, |
| "loss": 0.6218, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.5617021276595745, |
| "grad_norm": 2.5367627143859863, |
| "learning_rate": 4.908431327193005e-06, |
| "loss": 0.6354, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.5621749408983452, |
| "grad_norm": 3.7939631938934326, |
| "learning_rate": 4.908263962606091e-06, |
| "loss": 0.6376, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.5626477541371159, |
| "grad_norm": 2.864079475402832, |
| "learning_rate": 4.908096448067139e-06, |
| "loss": 0.5485, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5631205673758866, |
| "grad_norm": 2.7855563163757324, |
| "learning_rate": 4.9079287835865804e-06, |
| "loss": 0.6645, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.5635933806146572, |
| "grad_norm": 2.6156625747680664, |
| "learning_rate": 4.9077609691748556e-06, |
| "loss": 0.5751, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.5640661938534279, |
| "grad_norm": 3.0475659370422363, |
| "learning_rate": 4.907593004842412e-06, |
| "loss": 0.6739, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.5645390070921986, |
| "grad_norm": 2.9176738262176514, |
| "learning_rate": 4.9074248905997104e-06, |
| "loss": 0.6493, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.5650118203309693, |
| "grad_norm": 2.6168384552001953, |
| "learning_rate": 4.907256626457216e-06, |
| "loss": 0.6154, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.56548463356974, |
| "grad_norm": 2.893980026245117, |
| "learning_rate": 4.907088212425408e-06, |
| "loss": 0.5808, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.5659574468085107, |
| "grad_norm": 3.3832836151123047, |
| "learning_rate": 4.90691964851477e-06, |
| "loss": 0.7888, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.5664302600472814, |
| "grad_norm": 3.088932752609253, |
| "learning_rate": 4.906750934735801e-06, |
| "loss": 0.6516, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.566903073286052, |
| "grad_norm": 2.494471549987793, |
| "learning_rate": 4.906582071099004e-06, |
| "loss": 0.6286, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "grad_norm": 2.716550588607788, |
| "learning_rate": 4.906413057614895e-06, |
| "loss": 0.5939, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5678486997635934, |
| "grad_norm": 2.5821073055267334, |
| "learning_rate": 4.906243894293995e-06, |
| "loss": 0.6668, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.5683215130023641, |
| "grad_norm": 3.651787042617798, |
| "learning_rate": 4.90607458114684e-06, |
| "loss": 0.6124, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.5687943262411348, |
| "grad_norm": 2.7567858695983887, |
| "learning_rate": 4.9059051181839705e-06, |
| "loss": 0.6656, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.5692671394799055, |
| "grad_norm": 2.8067586421966553, |
| "learning_rate": 4.90573550541594e-06, |
| "loss": 0.6306, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.5697399527186762, |
| "grad_norm": 2.6136393547058105, |
| "learning_rate": 4.905565742853307e-06, |
| "loss": 0.5992, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.5702127659574469, |
| "grad_norm": 2.899049758911133, |
| "learning_rate": 4.905395830506644e-06, |
| "loss": 0.621, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.5706855791962175, |
| "grad_norm": 3.036583185195923, |
| "learning_rate": 4.9052257683865294e-06, |
| "loss": 0.652, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.5711583924349882, |
| "grad_norm": 2.7947216033935547, |
| "learning_rate": 4.905055556503553e-06, |
| "loss": 0.6636, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.5716312056737589, |
| "grad_norm": 3.1646955013275146, |
| "learning_rate": 4.9048851948683135e-06, |
| "loss": 0.6376, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.5721040189125296, |
| "grad_norm": 2.8175766468048096, |
| "learning_rate": 4.904714683491417e-06, |
| "loss": 0.5929, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5725768321513003, |
| "grad_norm": 2.923923969268799, |
| "learning_rate": 4.904544022383483e-06, |
| "loss": 0.6633, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.573049645390071, |
| "grad_norm": 2.7471134662628174, |
| "learning_rate": 4.9043732115551356e-06, |
| "loss": 0.6551, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.5735224586288417, |
| "grad_norm": 2.8660807609558105, |
| "learning_rate": 4.90420225101701e-06, |
| "loss": 0.6423, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.5739952718676123, |
| "grad_norm": 2.769247531890869, |
| "learning_rate": 4.904031140779754e-06, |
| "loss": 0.5982, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.574468085106383, |
| "grad_norm": 2.9043145179748535, |
| "learning_rate": 4.90385988085402e-06, |
| "loss": 0.5843, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.5749408983451537, |
| "grad_norm": 2.6639609336853027, |
| "learning_rate": 4.903688471250471e-06, |
| "loss": 0.5858, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.5754137115839244, |
| "grad_norm": 2.6967573165893555, |
| "learning_rate": 4.903516911979781e-06, |
| "loss": 0.5755, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.5758865248226951, |
| "grad_norm": 2.8865857124328613, |
| "learning_rate": 4.903345203052633e-06, |
| "loss": 0.6051, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.5763593380614658, |
| "grad_norm": 2.381979465484619, |
| "learning_rate": 4.903173344479717e-06, |
| "loss": 0.5727, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.5768321513002365, |
| "grad_norm": 2.7717981338500977, |
| "learning_rate": 4.903001336271734e-06, |
| "loss": 0.6406, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.577304964539007, |
| "grad_norm": 2.6431570053100586, |
| "learning_rate": 4.902829178439395e-06, |
| "loss": 0.6226, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.5777777777777777, |
| "grad_norm": 2.8090415000915527, |
| "learning_rate": 4.902656870993419e-06, |
| "loss": 0.5761, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.5782505910165484, |
| "grad_norm": 2.4769368171691895, |
| "learning_rate": 4.902484413944535e-06, |
| "loss": 0.5602, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.5787234042553191, |
| "grad_norm": 2.693316698074341, |
| "learning_rate": 4.902311807303481e-06, |
| "loss": 0.5222, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.5791962174940898, |
| "grad_norm": 2.7623913288116455, |
| "learning_rate": 4.902139051081004e-06, |
| "loss": 0.6978, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.5796690307328605, |
| "grad_norm": 2.6133766174316406, |
| "learning_rate": 4.901966145287863e-06, |
| "loss": 0.5802, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.5801418439716312, |
| "grad_norm": 2.7345972061157227, |
| "learning_rate": 4.901793089934821e-06, |
| "loss": 0.6294, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.5806146572104018, |
| "grad_norm": 2.7545835971832275, |
| "learning_rate": 4.9016198850326555e-06, |
| "loss": 0.6085, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.5810874704491725, |
| "grad_norm": 2.6947758197784424, |
| "learning_rate": 4.90144653059215e-06, |
| "loss": 0.6025, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.5815602836879432, |
| "grad_norm": 2.692967414855957, |
| "learning_rate": 4.901273026624099e-06, |
| "loss": 0.5715, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5820330969267139, |
| "grad_norm": 2.78347110748291, |
| "learning_rate": 4.901099373139307e-06, |
| "loss": 0.6063, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.5825059101654846, |
| "grad_norm": 2.346496343612671, |
| "learning_rate": 4.900925570148585e-06, |
| "loss": 0.5869, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.5829787234042553, |
| "grad_norm": 2.606639862060547, |
| "learning_rate": 4.900751617662755e-06, |
| "loss": 0.6197, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.583451536643026, |
| "grad_norm": 2.5825929641723633, |
| "learning_rate": 4.900577515692649e-06, |
| "loss": 0.6721, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.5839243498817966, |
| "grad_norm": 2.731349468231201, |
| "learning_rate": 4.900403264249107e-06, |
| "loss": 0.6273, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.5843971631205673, |
| "grad_norm": 3.2133874893188477, |
| "learning_rate": 4.90022886334298e-06, |
| "loss": 0.6231, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.584869976359338, |
| "grad_norm": 2.9213852882385254, |
| "learning_rate": 4.900054312985127e-06, |
| "loss": 0.6677, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.5853427895981087, |
| "grad_norm": 2.815425157546997, |
| "learning_rate": 4.899879613186414e-06, |
| "loss": 0.6405, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.5858156028368794, |
| "grad_norm": 2.730782985687256, |
| "learning_rate": 4.899704763957721e-06, |
| "loss": 0.6233, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.5862884160756501, |
| "grad_norm": 2.6432766914367676, |
| "learning_rate": 4.899529765309936e-06, |
| "loss": 0.6267, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.5867612293144208, |
| "grad_norm": 2.616215229034424, |
| "learning_rate": 4.899354617253953e-06, |
| "loss": 0.6268, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.5872340425531914, |
| "grad_norm": 2.7630255222320557, |
| "learning_rate": 4.899179319800679e-06, |
| "loss": 0.6348, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.5877068557919621, |
| "grad_norm": 2.785095453262329, |
| "learning_rate": 4.899003872961029e-06, |
| "loss": 0.5839, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.5881796690307328, |
| "grad_norm": 2.9050328731536865, |
| "learning_rate": 4.898828276745927e-06, |
| "loss": 0.651, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.5886524822695035, |
| "grad_norm": 2.958092212677002, |
| "learning_rate": 4.8986525311663065e-06, |
| "loss": 0.6395, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.5891252955082742, |
| "grad_norm": 2.952310800552368, |
| "learning_rate": 4.898476636233111e-06, |
| "loss": 0.6731, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.5895981087470449, |
| "grad_norm": 2.9876346588134766, |
| "learning_rate": 4.898300591957293e-06, |
| "loss": 0.7015, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.5900709219858156, |
| "grad_norm": 2.8941752910614014, |
| "learning_rate": 4.898124398349813e-06, |
| "loss": 0.6452, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.5905437352245863, |
| "grad_norm": 2.9809536933898926, |
| "learning_rate": 4.897948055421642e-06, |
| "loss": 0.5736, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.5910165484633569, |
| "grad_norm": 2.927046775817871, |
| "learning_rate": 4.897771563183761e-06, |
| "loss": 0.5918, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.5914893617021276, |
| "grad_norm": 2.865020275115967, |
| "learning_rate": 4.897594921647158e-06, |
| "loss": 0.6924, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.5919621749408983, |
| "grad_norm": 2.7406699657440186, |
| "learning_rate": 4.897418130822832e-06, |
| "loss": 0.509, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.592434988179669, |
| "grad_norm": 2.781606912612915, |
| "learning_rate": 4.897241190721791e-06, |
| "loss": 0.5555, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.5929078014184397, |
| "grad_norm": 2.79209303855896, |
| "learning_rate": 4.8970641013550535e-06, |
| "loss": 0.6722, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.5933806146572104, |
| "grad_norm": 3.0672268867492676, |
| "learning_rate": 4.896886862733645e-06, |
| "loss": 0.6366, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.5938534278959811, |
| "grad_norm": 2.7456953525543213, |
| "learning_rate": 4.896709474868602e-06, |
| "loss": 0.6246, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.5943262411347517, |
| "grad_norm": 3.6731202602386475, |
| "learning_rate": 4.896531937770968e-06, |
| "loss": 0.668, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.5947990543735224, |
| "grad_norm": 2.6056087017059326, |
| "learning_rate": 4.8963542514518e-06, |
| "loss": 0.5815, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.5952718676122931, |
| "grad_norm": 2.719698905944824, |
| "learning_rate": 4.89617641592216e-06, |
| "loss": 0.6058, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 2.625838279724121, |
| "learning_rate": 4.895998431193121e-06, |
| "loss": 0.6143, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5962174940898345, |
| "grad_norm": 2.7166085243225098, |
| "learning_rate": 4.895820297275767e-06, |
| "loss": 0.5187, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.5966903073286052, |
| "grad_norm": 2.7544102668762207, |
| "learning_rate": 4.8956420141811875e-06, |
| "loss": 0.5928, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.5971631205673759, |
| "grad_norm": 2.6678333282470703, |
| "learning_rate": 4.895463581920484e-06, |
| "loss": 0.611, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.5976359338061465, |
| "grad_norm": 2.853384494781494, |
| "learning_rate": 4.895285000504768e-06, |
| "loss": 0.642, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.5981087470449172, |
| "grad_norm": 2.637852430343628, |
| "learning_rate": 4.895106269945158e-06, |
| "loss": 0.6308, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.5985815602836879, |
| "grad_norm": 2.9880387783050537, |
| "learning_rate": 4.8949273902527826e-06, |
| "loss": 0.5781, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.5990543735224586, |
| "grad_norm": 3.5984015464782715, |
| "learning_rate": 4.89474836143878e-06, |
| "loss": 0.5865, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.5995271867612293, |
| "grad_norm": 2.719855546951294, |
| "learning_rate": 4.8945691835142975e-06, |
| "loss": 0.6393, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.7885141372680664, |
| "learning_rate": 4.894389856490492e-06, |
| "loss": 0.66, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.6004728132387707, |
| "grad_norm": 2.698819875717163, |
| "learning_rate": 4.894210380378529e-06, |
| "loss": 0.6144, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.6009456264775414, |
| "grad_norm": 2.278045654296875, |
| "learning_rate": 4.894030755189584e-06, |
| "loss": 0.5609, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.601418439716312, |
| "grad_norm": 2.8729357719421387, |
| "learning_rate": 4.893850980934841e-06, |
| "loss": 0.6715, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.6018912529550827, |
| "grad_norm": 2.8541221618652344, |
| "learning_rate": 4.893671057625495e-06, |
| "loss": 0.6787, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.6023640661938534, |
| "grad_norm": 2.4561476707458496, |
| "learning_rate": 4.893490985272748e-06, |
| "loss": 0.6331, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.6028368794326241, |
| "grad_norm": 2.565739154815674, |
| "learning_rate": 4.893310763887812e-06, |
| "loss": 0.587, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.6033096926713948, |
| "grad_norm": 2.384951591491699, |
| "learning_rate": 4.8931303934819095e-06, |
| "loss": 0.5358, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.6037825059101655, |
| "grad_norm": 2.380808115005493, |
| "learning_rate": 4.89294987406627e-06, |
| "loss": 0.5402, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.6042553191489362, |
| "grad_norm": 2.764815092086792, |
| "learning_rate": 4.892769205652136e-06, |
| "loss": 0.6103, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.6047281323877068, |
| "grad_norm": 2.463350296020508, |
| "learning_rate": 4.892588388250754e-06, |
| "loss": 0.5937, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.6052009456264775, |
| "grad_norm": 3.099689245223999, |
| "learning_rate": 4.8924074218733855e-06, |
| "loss": 0.6354, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.6056737588652482, |
| "grad_norm": 2.804450035095215, |
| "learning_rate": 4.892226306531297e-06, |
| "loss": 0.6595, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.6061465721040189, |
| "grad_norm": 3.1559767723083496, |
| "learning_rate": 4.892045042235765e-06, |
| "loss": 0.6664, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.6066193853427896, |
| "grad_norm": 2.844341993331909, |
| "learning_rate": 4.891863628998079e-06, |
| "loss": 0.7454, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.6070921985815603, |
| "grad_norm": 2.686602830886841, |
| "learning_rate": 4.891682066829532e-06, |
| "loss": 0.6755, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.607565011820331, |
| "grad_norm": 2.736457347869873, |
| "learning_rate": 4.8915003557414285e-06, |
| "loss": 0.6305, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.6080378250591016, |
| "grad_norm": 2.661362409591675, |
| "learning_rate": 4.891318495745086e-06, |
| "loss": 0.5958, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.6085106382978723, |
| "grad_norm": 2.707348108291626, |
| "learning_rate": 4.8911364868518255e-06, |
| "loss": 0.5824, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.608983451536643, |
| "grad_norm": 2.9798858165740967, |
| "learning_rate": 4.890954329072981e-06, |
| "loss": 0.5981, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.6094562647754137, |
| "grad_norm": 2.6285455226898193, |
| "learning_rate": 4.890772022419895e-06, |
| "loss": 0.6194, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.6099290780141844, |
| "grad_norm": 2.9254322052001953, |
| "learning_rate": 4.890589566903917e-06, |
| "loss": 0.6002, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.6104018912529551, |
| "grad_norm": 2.6458325386047363, |
| "learning_rate": 4.89040696253641e-06, |
| "loss": 0.5457, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.6108747044917258, |
| "grad_norm": 2.508242607116699, |
| "learning_rate": 4.890224209328743e-06, |
| "loss": 0.6168, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.6113475177304964, |
| "grad_norm": 3.034785509109497, |
| "learning_rate": 4.890041307292296e-06, |
| "loss": 0.664, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.6118203309692671, |
| "grad_norm": 3.52469539642334, |
| "learning_rate": 4.889858256438455e-06, |
| "loss": 0.7301, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.6122931442080378, |
| "grad_norm": 2.9145348072052, |
| "learning_rate": 4.889675056778622e-06, |
| "loss": 0.6494, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.6127659574468085, |
| "grad_norm": 2.831829071044922, |
| "learning_rate": 4.8894917083242e-06, |
| "loss": 0.6064, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.6132387706855792, |
| "grad_norm": 2.6883130073547363, |
| "learning_rate": 4.889308211086608e-06, |
| "loss": 0.5642, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.6137115839243499, |
| "grad_norm": 3.0605485439300537, |
| "learning_rate": 4.889124565077269e-06, |
| "loss": 0.6695, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.6141843971631206, |
| "grad_norm": 3.44062876701355, |
| "learning_rate": 4.88894077030762e-06, |
| "loss": 0.6415, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.6146572104018913, |
| "grad_norm": 2.5970818996429443, |
| "learning_rate": 4.888756826789105e-06, |
| "loss": 0.6518, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.6151300236406619, |
| "grad_norm": 4.2233567237854, |
| "learning_rate": 4.8885727345331755e-06, |
| "loss": 0.6555, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.6156028368794326, |
| "grad_norm": 2.645385503768921, |
| "learning_rate": 4.888388493551297e-06, |
| "loss": 0.6762, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.6160756501182033, |
| "grad_norm": 2.907954454421997, |
| "learning_rate": 4.8882041038549385e-06, |
| "loss": 0.6526, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.616548463356974, |
| "grad_norm": 2.482771873474121, |
| "learning_rate": 4.888019565455583e-06, |
| "loss": 0.628, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.6170212765957447, |
| "grad_norm": 2.7165915966033936, |
| "learning_rate": 4.88783487836472e-06, |
| "loss": 0.5743, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.6174940898345154, |
| "grad_norm": 3.095627546310425, |
| "learning_rate": 4.88765004259385e-06, |
| "loss": 0.627, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.6179669030732861, |
| "grad_norm": 2.5018465518951416, |
| "learning_rate": 4.8874650581544805e-06, |
| "loss": 0.5215, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.6184397163120567, |
| "grad_norm": 3.094337224960327, |
| "learning_rate": 4.8872799250581316e-06, |
| "loss": 0.6979, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.6189125295508274, |
| "grad_norm": 3.1002209186553955, |
| "learning_rate": 4.887094643316329e-06, |
| "loss": 0.6565, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.6193853427895981, |
| "grad_norm": 2.551431894302368, |
| "learning_rate": 4.88690921294061e-06, |
| "loss": 0.5748, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.6198581560283688, |
| "grad_norm": 2.8282904624938965, |
| "learning_rate": 4.886723633942521e-06, |
| "loss": 0.676, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.6203309692671395, |
| "grad_norm": 2.8887810707092285, |
| "learning_rate": 4.886537906333617e-06, |
| "loss": 0.5971, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.6208037825059102, |
| "grad_norm": 2.9989118576049805, |
| "learning_rate": 4.886352030125462e-06, |
| "loss": 0.6341, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.6212765957446809, |
| "grad_norm": 2.8042776584625244, |
| "learning_rate": 4.886166005329629e-06, |
| "loss": 0.6578, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.6217494089834515, |
| "grad_norm": 2.4980967044830322, |
| "learning_rate": 4.8859798319577026e-06, |
| "loss": 0.6711, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "grad_norm": 2.762369155883789, |
| "learning_rate": 4.885793510021274e-06, |
| "loss": 0.5747, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.6226950354609929, |
| "grad_norm": 3.136327028274536, |
| "learning_rate": 4.885607039531945e-06, |
| "loss": 0.7544, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.6231678486997636, |
| "grad_norm": 2.8736963272094727, |
| "learning_rate": 4.885420420501327e-06, |
| "loss": 0.6603, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.6236406619385343, |
| "grad_norm": 2.766237497329712, |
| "learning_rate": 4.885233652941039e-06, |
| "loss": 0.581, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.624113475177305, |
| "grad_norm": 2.4740939140319824, |
| "learning_rate": 4.88504673686271e-06, |
| "loss": 0.6335, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.6245862884160757, |
| "grad_norm": 3.324795961380005, |
| "learning_rate": 4.884859672277978e-06, |
| "loss": 0.6019, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.6250591016548463, |
| "grad_norm": 3.521327257156372, |
| "learning_rate": 4.884672459198493e-06, |
| "loss": 0.6104, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.625531914893617, |
| "grad_norm": 2.7728071212768555, |
| "learning_rate": 4.884485097635909e-06, |
| "loss": 0.6714, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.6260047281323877, |
| "grad_norm": 3.0738155841827393, |
| "learning_rate": 4.884297587601895e-06, |
| "loss": 0.604, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.6264775413711584, |
| "grad_norm": 2.719240427017212, |
| "learning_rate": 4.884109929108124e-06, |
| "loss": 0.6795, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.6269503546099291, |
| "grad_norm": 2.4108200073242188, |
| "learning_rate": 4.883922122166282e-06, |
| "loss": 0.5846, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.6274231678486998, |
| "grad_norm": 2.393899917602539, |
| "learning_rate": 4.883734166788063e-06, |
| "loss": 0.6188, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.6278959810874705, |
| "grad_norm": 4.555255889892578, |
| "learning_rate": 4.883546062985169e-06, |
| "loss": 0.5962, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.6283687943262412, |
| "grad_norm": 2.571075439453125, |
| "learning_rate": 4.883357810769315e-06, |
| "loss": 0.6165, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.6288416075650118, |
| "grad_norm": 2.553115129470825, |
| "learning_rate": 4.8831694101522185e-06, |
| "loss": 0.6787, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.6293144208037825, |
| "grad_norm": 3.2564642429351807, |
| "learning_rate": 4.882980861145614e-06, |
| "loss": 0.659, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.6297872340425532, |
| "grad_norm": 2.535216808319092, |
| "learning_rate": 4.882792163761241e-06, |
| "loss": 0.6176, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.6302600472813239, |
| "grad_norm": 3.097921848297119, |
| "learning_rate": 4.882603318010847e-06, |
| "loss": 0.6822, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.6307328605200946, |
| "grad_norm": 2.8135175704956055, |
| "learning_rate": 4.882414323906192e-06, |
| "loss": 0.6782, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.6312056737588653, |
| "grad_norm": 2.724634885787964, |
| "learning_rate": 4.882225181459044e-06, |
| "loss": 0.6545, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.631678486997636, |
| "grad_norm": 2.9585227966308594, |
| "learning_rate": 4.882035890681179e-06, |
| "loss": 0.6218, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.6321513002364066, |
| "grad_norm": 2.6952011585235596, |
| "learning_rate": 4.881846451584385e-06, |
| "loss": 0.6, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.6326241134751773, |
| "grad_norm": 3.1400704383850098, |
| "learning_rate": 4.881656864180455e-06, |
| "loss": 0.6687, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.633096926713948, |
| "grad_norm": 2.8382487297058105, |
| "learning_rate": 4.881467128481197e-06, |
| "loss": 0.574, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.6335697399527187, |
| "grad_norm": 2.8520095348358154, |
| "learning_rate": 4.881277244498422e-06, |
| "loss": 0.6582, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6340425531914894, |
| "grad_norm": 2.703498363494873, |
| "learning_rate": 4.881087212243956e-06, |
| "loss": 0.7224, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.6345153664302601, |
| "grad_norm": 3.697205066680908, |
| "learning_rate": 4.880897031729629e-06, |
| "loss": 0.6582, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.6349881796690308, |
| "grad_norm": 2.7625808715820312, |
| "learning_rate": 4.880706702967284e-06, |
| "loss": 0.574, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.6354609929078014, |
| "grad_norm": 2.949984073638916, |
| "learning_rate": 4.880516225968771e-06, |
| "loss": 0.66, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.6359338061465721, |
| "grad_norm": 2.548269748687744, |
| "learning_rate": 4.8803256007459525e-06, |
| "loss": 0.642, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.6364066193853428, |
| "grad_norm": 2.5102174282073975, |
| "learning_rate": 4.8801348273106945e-06, |
| "loss": 0.6238, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.6368794326241135, |
| "grad_norm": 2.9847946166992188, |
| "learning_rate": 4.8799439056748786e-06, |
| "loss": 0.5416, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.6373522458628842, |
| "grad_norm": 2.8711049556732178, |
| "learning_rate": 4.879752835850391e-06, |
| "loss": 0.6427, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.6378250591016549, |
| "grad_norm": 2.7901716232299805, |
| "learning_rate": 4.879561617849129e-06, |
| "loss": 0.6026, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 2.659778356552124, |
| "learning_rate": 4.879370251682999e-06, |
| "loss": 0.6623, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6387706855791963, |
| "grad_norm": 3.224386692047119, |
| "learning_rate": 4.879178737363917e-06, |
| "loss": 0.6485, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.6392434988179669, |
| "grad_norm": 2.6385605335235596, |
| "learning_rate": 4.8789870749038076e-06, |
| "loss": 0.5866, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.6397163120567376, |
| "grad_norm": 2.807713270187378, |
| "learning_rate": 4.8787952643146045e-06, |
| "loss": 0.6537, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.6401891252955083, |
| "grad_norm": 2.5689280033111572, |
| "learning_rate": 4.878603305608251e-06, |
| "loss": 0.6216, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.640661938534279, |
| "grad_norm": 2.7347843647003174, |
| "learning_rate": 4.8784111987967e-06, |
| "loss": 0.6318, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.6411347517730497, |
| "grad_norm": 2.5210378170013428, |
| "learning_rate": 4.878218943891911e-06, |
| "loss": 0.5472, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.6416075650118204, |
| "grad_norm": 2.866785764694214, |
| "learning_rate": 4.878026540905858e-06, |
| "loss": 0.7108, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.642080378250591, |
| "grad_norm": 2.923314332962036, |
| "learning_rate": 4.877833989850519e-06, |
| "loss": 0.5557, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.6425531914893617, |
| "grad_norm": 2.925463914871216, |
| "learning_rate": 4.8776412907378845e-06, |
| "loss": 0.6382, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.6430260047281324, |
| "grad_norm": 2.909644365310669, |
| "learning_rate": 4.877448443579952e-06, |
| "loss": 0.5603, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.6434988179669031, |
| "grad_norm": 3.501148223876953, |
| "learning_rate": 4.8772554483887306e-06, |
| "loss": 0.6722, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.6439716312056738, |
| "grad_norm": 2.823765516281128, |
| "learning_rate": 4.877062305176235e-06, |
| "loss": 0.6408, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.6444444444444445, |
| "grad_norm": 2.9807584285736084, |
| "learning_rate": 4.8768690139544935e-06, |
| "loss": 0.5984, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.6449172576832152, |
| "grad_norm": 2.8411378860473633, |
| "learning_rate": 4.8766755747355405e-06, |
| "loss": 0.6231, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.6453900709219859, |
| "grad_norm": 3.158952236175537, |
| "learning_rate": 4.8764819875314215e-06, |
| "loss": 0.6441, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.6458628841607565, |
| "grad_norm": 2.9614369869232178, |
| "learning_rate": 4.876288252354189e-06, |
| "loss": 0.6308, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.6463356973995272, |
| "grad_norm": 3.073805570602417, |
| "learning_rate": 4.876094369215907e-06, |
| "loss": 0.6046, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.6468085106382979, |
| "grad_norm": 2.719189405441284, |
| "learning_rate": 4.875900338128648e-06, |
| "loss": 0.6082, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.6472813238770686, |
| "grad_norm": 2.676726818084717, |
| "learning_rate": 4.8757061591044914e-06, |
| "loss": 0.6344, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.6477541371158393, |
| "grad_norm": 2.955256938934326, |
| "learning_rate": 4.87551183215553e-06, |
| "loss": 0.6506, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.64822695035461, |
| "grad_norm": 2.5672218799591064, |
| "learning_rate": 4.875317357293864e-06, |
| "loss": 0.5284, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.6486997635933807, |
| "grad_norm": 2.5860238075256348, |
| "learning_rate": 4.875122734531602e-06, |
| "loss": 0.667, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.6491725768321513, |
| "grad_norm": 3.1037003993988037, |
| "learning_rate": 4.8749279638808605e-06, |
| "loss": 0.6902, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.649645390070922, |
| "grad_norm": 2.7715282440185547, |
| "learning_rate": 4.874733045353769e-06, |
| "loss": 0.6291, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.6501182033096927, |
| "grad_norm": 2.527071475982666, |
| "learning_rate": 4.874537978962463e-06, |
| "loss": 0.5565, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.6505910165484634, |
| "grad_norm": 2.722092628479004, |
| "learning_rate": 4.874342764719091e-06, |
| "loss": 0.5724, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.6510638297872341, |
| "grad_norm": 2.6342411041259766, |
| "learning_rate": 4.874147402635805e-06, |
| "loss": 0.6308, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.6515366430260048, |
| "grad_norm": 2.3850719928741455, |
| "learning_rate": 4.8739518927247695e-06, |
| "loss": 0.5692, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.6520094562647755, |
| "grad_norm": 2.9787259101867676, |
| "learning_rate": 4.873756234998161e-06, |
| "loss": 0.6953, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.6524822695035462, |
| "grad_norm": 2.634141683578491, |
| "learning_rate": 4.873560429468159e-06, |
| "loss": 0.6077, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6529550827423168, |
| "grad_norm": 2.803046941757202, |
| "learning_rate": 4.873364476146958e-06, |
| "loss": 0.6657, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.6534278959810875, |
| "grad_norm": 2.762827157974243, |
| "learning_rate": 4.8731683750467574e-06, |
| "loss": 0.6061, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.6539007092198581, |
| "grad_norm": 2.6654391288757324, |
| "learning_rate": 4.872972126179768e-06, |
| "loss": 0.6387, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.6543735224586288, |
| "grad_norm": 2.4363625049591064, |
| "learning_rate": 4.872775729558209e-06, |
| "loss": 0.5623, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.6548463356973995, |
| "grad_norm": 2.528959035873413, |
| "learning_rate": 4.87257918519431e-06, |
| "loss": 0.5609, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.6553191489361702, |
| "grad_norm": 2.718383312225342, |
| "learning_rate": 4.872382493100309e-06, |
| "loss": 0.5575, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.6557919621749408, |
| "grad_norm": 2.660841226577759, |
| "learning_rate": 4.872185653288453e-06, |
| "loss": 0.6106, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.6562647754137115, |
| "grad_norm": 2.508753538131714, |
| "learning_rate": 4.871988665770997e-06, |
| "loss": 0.5705, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.6567375886524822, |
| "grad_norm": 2.5134334564208984, |
| "learning_rate": 4.871791530560208e-06, |
| "loss": 0.5592, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.6572104018912529, |
| "grad_norm": 2.7475597858428955, |
| "learning_rate": 4.871594247668361e-06, |
| "loss": 0.6277, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.6576832151300236, |
| "grad_norm": 2.793616533279419, |
| "learning_rate": 4.871396817107739e-06, |
| "loss": 0.595, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.6581560283687943, |
| "grad_norm": 2.8285086154937744, |
| "learning_rate": 4.871199238890635e-06, |
| "loss": 0.6094, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.658628841607565, |
| "grad_norm": 2.74124813079834, |
| "learning_rate": 4.871001513029352e-06, |
| "loss": 0.6296, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.6591016548463356, |
| "grad_norm": 2.761237621307373, |
| "learning_rate": 4.870803639536202e-06, |
| "loss": 0.5702, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.6595744680851063, |
| "grad_norm": 2.761038064956665, |
| "learning_rate": 4.870605618423504e-06, |
| "loss": 0.6195, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.660047281323877, |
| "grad_norm": 2.8812482357025146, |
| "learning_rate": 4.870407449703589e-06, |
| "loss": 0.616, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.6605200945626477, |
| "grad_norm": 2.9966578483581543, |
| "learning_rate": 4.870209133388797e-06, |
| "loss": 0.6547, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.6609929078014184, |
| "grad_norm": 2.7969017028808594, |
| "learning_rate": 4.870010669491474e-06, |
| "loss": 0.5762, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.6614657210401891, |
| "grad_norm": 2.557783842086792, |
| "learning_rate": 4.86981205802398e-06, |
| "loss": 0.6184, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.6619385342789598, |
| "grad_norm": 2.5393927097320557, |
| "learning_rate": 4.86961329899868e-06, |
| "loss": 0.5953, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6624113475177305, |
| "grad_norm": 2.7745981216430664, |
| "learning_rate": 4.86941439242795e-06, |
| "loss": 0.5967, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.6628841607565011, |
| "grad_norm": 2.650381326675415, |
| "learning_rate": 4.869215338324176e-06, |
| "loss": 0.5667, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.6633569739952718, |
| "grad_norm": 2.583169937133789, |
| "learning_rate": 4.869016136699751e-06, |
| "loss": 0.549, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.6638297872340425, |
| "grad_norm": 2.984978437423706, |
| "learning_rate": 4.868816787567079e-06, |
| "loss": 0.5931, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.6643026004728132, |
| "grad_norm": 3.1947181224823, |
| "learning_rate": 4.868617290938573e-06, |
| "loss": 0.5473, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.6647754137115839, |
| "grad_norm": 2.562927007675171, |
| "learning_rate": 4.868417646826654e-06, |
| "loss": 0.6878, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.6652482269503546, |
| "grad_norm": 2.8741261959075928, |
| "learning_rate": 4.868217855243754e-06, |
| "loss": 0.6312, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.6657210401891253, |
| "grad_norm": 2.9834797382354736, |
| "learning_rate": 4.868017916202312e-06, |
| "loss": 0.5624, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.6661938534278959, |
| "grad_norm": 2.6935982704162598, |
| "learning_rate": 4.8678178297147785e-06, |
| "loss": 0.5857, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 2.8200576305389404, |
| "learning_rate": 4.86761759579361e-06, |
| "loss": 0.6153, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6671394799054373, |
| "grad_norm": 2.831425189971924, |
| "learning_rate": 4.867417214451276e-06, |
| "loss": 0.6495, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.667612293144208, |
| "grad_norm": 2.733565092086792, |
| "learning_rate": 4.867216685700253e-06, |
| "loss": 0.6036, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.6680851063829787, |
| "grad_norm": 3.0609400272369385, |
| "learning_rate": 4.867016009553027e-06, |
| "loss": 0.6773, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.6685579196217494, |
| "grad_norm": 2.665452241897583, |
| "learning_rate": 4.866815186022093e-06, |
| "loss": 0.6256, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.6690307328605201, |
| "grad_norm": 2.9480721950531006, |
| "learning_rate": 4.866614215119956e-06, |
| "loss": 0.535, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.6695035460992907, |
| "grad_norm": 2.5514180660247803, |
| "learning_rate": 4.866413096859128e-06, |
| "loss": 0.6588, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.6699763593380614, |
| "grad_norm": 3.3442373275756836, |
| "learning_rate": 4.866211831252134e-06, |
| "loss": 0.5754, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.6704491725768321, |
| "grad_norm": 2.521467685699463, |
| "learning_rate": 4.866010418311504e-06, |
| "loss": 0.5546, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.6709219858156028, |
| "grad_norm": 2.930706262588501, |
| "learning_rate": 4.865808858049781e-06, |
| "loss": 0.589, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.6713947990543735, |
| "grad_norm": 2.6298375129699707, |
| "learning_rate": 4.865607150479513e-06, |
| "loss": 0.5915, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.6718676122931442, |
| "grad_norm": 2.9554293155670166, |
| "learning_rate": 4.8654052956132615e-06, |
| "loss": 0.6654, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.6723404255319149, |
| "grad_norm": 3.2706902027130127, |
| "learning_rate": 4.865203293463593e-06, |
| "loss": 0.7115, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.6728132387706856, |
| "grad_norm": 3.041539430618286, |
| "learning_rate": 4.865001144043088e-06, |
| "loss": 0.5818, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.6732860520094562, |
| "grad_norm": 3.1314544677734375, |
| "learning_rate": 4.864798847364331e-06, |
| "loss": 0.5822, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.6737588652482269, |
| "grad_norm": 2.5301461219787598, |
| "learning_rate": 4.86459640343992e-06, |
| "loss": 0.5525, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.6742316784869976, |
| "grad_norm": 2.809295892715454, |
| "learning_rate": 4.864393812282458e-06, |
| "loss": 0.6768, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.6747044917257683, |
| "grad_norm": 2.794664144515991, |
| "learning_rate": 4.864191073904562e-06, |
| "loss": 0.5793, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.675177304964539, |
| "grad_norm": 2.7771105766296387, |
| "learning_rate": 4.863988188318854e-06, |
| "loss": 0.6453, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.6756501182033097, |
| "grad_norm": 2.6431946754455566, |
| "learning_rate": 4.863785155537967e-06, |
| "loss": 0.5877, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.6761229314420804, |
| "grad_norm": 2.951353073120117, |
| "learning_rate": 4.863581975574544e-06, |
| "loss": 0.6793, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.676595744680851, |
| "grad_norm": 3.1336071491241455, |
| "learning_rate": 4.863378648441235e-06, |
| "loss": 0.6695, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.6770685579196217, |
| "grad_norm": 2.735982656478882, |
| "learning_rate": 4.8631751741507e-06, |
| "loss": 0.5239, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.6775413711583924, |
| "grad_norm": 2.7085206508636475, |
| "learning_rate": 4.862971552715611e-06, |
| "loss": 0.6837, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.6780141843971631, |
| "grad_norm": 3.136528730392456, |
| "learning_rate": 4.8627677841486436e-06, |
| "loss": 0.683, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.6784869976359338, |
| "grad_norm": 2.7879369258880615, |
| "learning_rate": 4.862563868462486e-06, |
| "loss": 0.608, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.6789598108747045, |
| "grad_norm": 2.7937729358673096, |
| "learning_rate": 4.862359805669837e-06, |
| "loss": 0.6131, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.6794326241134752, |
| "grad_norm": 2.5988364219665527, |
| "learning_rate": 4.862155595783401e-06, |
| "loss": 0.6303, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.6799054373522458, |
| "grad_norm": 3.251070499420166, |
| "learning_rate": 4.861951238815894e-06, |
| "loss": 0.7246, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.6803782505910165, |
| "grad_norm": 2.646759271621704, |
| "learning_rate": 4.861746734780039e-06, |
| "loss": 0.6313, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 2.773866891860962, |
| "learning_rate": 4.861542083688573e-06, |
| "loss": 0.6463, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6813238770685579, |
| "grad_norm": 2.759965658187866, |
| "learning_rate": 4.861337285554235e-06, |
| "loss": 0.5428, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.6817966903073286, |
| "grad_norm": 3.3250818252563477, |
| "learning_rate": 4.861132340389779e-06, |
| "loss": 0.6522, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.6822695035460993, |
| "grad_norm": 2.661797523498535, |
| "learning_rate": 4.860927248207965e-06, |
| "loss": 0.5871, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.68274231678487, |
| "grad_norm": 2.706289052963257, |
| "learning_rate": 4.860722009021563e-06, |
| "loss": 0.6651, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.6832151300236406, |
| "grad_norm": 2.8459298610687256, |
| "learning_rate": 4.860516622843354e-06, |
| "loss": 0.5827, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.6836879432624113, |
| "grad_norm": 3.1041831970214844, |
| "learning_rate": 4.860311089686125e-06, |
| "loss": 0.6727, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.684160756501182, |
| "grad_norm": 2.9382801055908203, |
| "learning_rate": 4.8601054095626746e-06, |
| "loss": 0.6002, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.6846335697399527, |
| "grad_norm": 2.782475471496582, |
| "learning_rate": 4.859899582485808e-06, |
| "loss": 0.6951, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.6851063829787234, |
| "grad_norm": 3.313894510269165, |
| "learning_rate": 4.859693608468343e-06, |
| "loss": 0.6363, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.6855791962174941, |
| "grad_norm": 3.1639695167541504, |
| "learning_rate": 4.8594874875231045e-06, |
| "loss": 0.7002, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.6860520094562648, |
| "grad_norm": 2.6762218475341797, |
| "learning_rate": 4.859281219662926e-06, |
| "loss": 0.6246, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.6865248226950355, |
| "grad_norm": 2.8368663787841797, |
| "learning_rate": 4.85907480490065e-06, |
| "loss": 0.5906, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.6869976359338061, |
| "grad_norm": 2.887373208999634, |
| "learning_rate": 4.858868243249131e-06, |
| "loss": 0.5931, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.6874704491725768, |
| "grad_norm": 2.8115322589874268, |
| "learning_rate": 4.858661534721229e-06, |
| "loss": 0.6337, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.6879432624113475, |
| "grad_norm": 2.8470499515533447, |
| "learning_rate": 4.8584546793298174e-06, |
| "loss": 0.632, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.6884160756501182, |
| "grad_norm": 2.8229613304138184, |
| "learning_rate": 4.8582476770877725e-06, |
| "loss": 0.6494, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.6888888888888889, |
| "grad_norm": 2.4235479831695557, |
| "learning_rate": 4.858040528007987e-06, |
| "loss": 0.5709, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.6893617021276596, |
| "grad_norm": 2.9348199367523193, |
| "learning_rate": 4.857833232103356e-06, |
| "loss": 0.5404, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.6898345153664303, |
| "grad_norm": 2.8274219036102295, |
| "learning_rate": 4.857625789386789e-06, |
| "loss": 0.701, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.6903073286052009, |
| "grad_norm": 3.136929988861084, |
| "learning_rate": 4.857418199871203e-06, |
| "loss": 0.6971, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.6907801418439716, |
| "grad_norm": 2.8987185955047607, |
| "learning_rate": 4.8572104635695214e-06, |
| "loss": 0.6613, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.6912529550827423, |
| "grad_norm": 2.5073442459106445, |
| "learning_rate": 4.857002580494681e-06, |
| "loss": 0.6032, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.691725768321513, |
| "grad_norm": 2.7019522190093994, |
| "learning_rate": 4.856794550659625e-06, |
| "loss": 0.567, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.6921985815602837, |
| "grad_norm": 2.4795594215393066, |
| "learning_rate": 4.8565863740773054e-06, |
| "loss": 0.5777, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.6926713947990544, |
| "grad_norm": 3.032506227493286, |
| "learning_rate": 4.856378050760687e-06, |
| "loss": 0.607, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.6931442080378251, |
| "grad_norm": 3.052091121673584, |
| "learning_rate": 4.85616958072274e-06, |
| "loss": 0.591, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.6936170212765957, |
| "grad_norm": 2.704831838607788, |
| "learning_rate": 4.855960963976443e-06, |
| "loss": 0.6528, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.6940898345153664, |
| "grad_norm": 2.680995225906372, |
| "learning_rate": 4.855752200534788e-06, |
| "loss": 0.6294, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.6945626477541371, |
| "grad_norm": 2.3948659896850586, |
| "learning_rate": 4.855543290410774e-06, |
| "loss": 0.6091, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.6950354609929078, |
| "grad_norm": 2.6407411098480225, |
| "learning_rate": 4.855334233617407e-06, |
| "loss": 0.5572, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.6955082742316785, |
| "grad_norm": 2.5526835918426514, |
| "learning_rate": 4.8551250301677064e-06, |
| "loss": 0.5432, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.6959810874704492, |
| "grad_norm": 3.1237430572509766, |
| "learning_rate": 4.8549156800746965e-06, |
| "loss": 0.5944, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.6964539007092199, |
| "grad_norm": 2.8112540245056152, |
| "learning_rate": 4.854706183351412e-06, |
| "loss": 0.604, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.6969267139479906, |
| "grad_norm": 2.664644479751587, |
| "learning_rate": 4.8544965400109e-06, |
| "loss": 0.5647, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.6973995271867612, |
| "grad_norm": 3.26310133934021, |
| "learning_rate": 4.854286750066212e-06, |
| "loss": 0.6999, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.6978723404255319, |
| "grad_norm": 2.9717442989349365, |
| "learning_rate": 4.8540768135304115e-06, |
| "loss": 0.6655, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.6983451536643026, |
| "grad_norm": 2.5302982330322266, |
| "learning_rate": 4.85386673041657e-06, |
| "loss": 0.6384, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.6988179669030733, |
| "grad_norm": 2.864877700805664, |
| "learning_rate": 4.853656500737769e-06, |
| "loss": 0.6834, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.699290780141844, |
| "grad_norm": 2.5522031784057617, |
| "learning_rate": 4.853446124507098e-06, |
| "loss": 0.5929, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.6997635933806147, |
| "grad_norm": 3.096477746963501, |
| "learning_rate": 4.853235601737656e-06, |
| "loss": 0.5737, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.7002364066193854, |
| "grad_norm": 2.884779214859009, |
| "learning_rate": 4.853024932442552e-06, |
| "loss": 0.6362, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.700709219858156, |
| "grad_norm": 3.368558406829834, |
| "learning_rate": 4.852814116634903e-06, |
| "loss": 0.6721, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.7011820330969267, |
| "grad_norm": 2.742414951324463, |
| "learning_rate": 4.852603154327837e-06, |
| "loss": 0.6212, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.7016548463356974, |
| "grad_norm": 2.53454852104187, |
| "learning_rate": 4.8523920455344864e-06, |
| "loss": 0.6675, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.7021276595744681, |
| "grad_norm": 2.9354238510131836, |
| "learning_rate": 4.852180790267999e-06, |
| "loss": 0.6692, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.7026004728132388, |
| "grad_norm": 2.585070848464966, |
| "learning_rate": 4.8519693885415274e-06, |
| "loss": 0.6215, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.7030732860520095, |
| "grad_norm": 2.9047999382019043, |
| "learning_rate": 4.851757840368235e-06, |
| "loss": 0.6231, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.7035460992907802, |
| "grad_norm": 3.0930933952331543, |
| "learning_rate": 4.851546145761295e-06, |
| "loss": 0.7267, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.7040189125295508, |
| "grad_norm": 3.0224719047546387, |
| "learning_rate": 4.8513343047338875e-06, |
| "loss": 0.6293, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.7044917257683215, |
| "grad_norm": 2.5758471488952637, |
| "learning_rate": 4.851122317299203e-06, |
| "loss": 0.5855, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.7049645390070922, |
| "grad_norm": 2.579272508621216, |
| "learning_rate": 4.850910183470441e-06, |
| "loss": 0.582, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.7054373522458629, |
| "grad_norm": 2.8148300647735596, |
| "learning_rate": 4.85069790326081e-06, |
| "loss": 0.6396, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.7059101654846336, |
| "grad_norm": 2.6380527019500732, |
| "learning_rate": 4.850485476683528e-06, |
| "loss": 0.6114, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.7063829787234043, |
| "grad_norm": 2.7736263275146484, |
| "learning_rate": 4.850272903751823e-06, |
| "loss": 0.6683, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.706855791962175, |
| "grad_norm": 3.1958179473876953, |
| "learning_rate": 4.8500601844789285e-06, |
| "loss": 0.6265, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.7073286052009456, |
| "grad_norm": 3.783212423324585, |
| "learning_rate": 4.8498473188780916e-06, |
| "loss": 0.6078, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.7078014184397163, |
| "grad_norm": 2.6656646728515625, |
| "learning_rate": 4.849634306962566e-06, |
| "loss": 0.5756, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.708274231678487, |
| "grad_norm": 2.757141590118408, |
| "learning_rate": 4.849421148745615e-06, |
| "loss": 0.5596, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.7087470449172577, |
| "grad_norm": 3.0391886234283447, |
| "learning_rate": 4.849207844240511e-06, |
| "loss": 0.5293, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "grad_norm": 2.981912851333618, |
| "learning_rate": 4.848994393460535e-06, |
| "loss": 0.598, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7096926713947991, |
| "grad_norm": 2.5470798015594482, |
| "learning_rate": 4.848780796418978e-06, |
| "loss": 0.6266, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.7101654846335698, |
| "grad_norm": 2.8394415378570557, |
| "learning_rate": 4.8485670531291415e-06, |
| "loss": 0.6844, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.7106382978723405, |
| "grad_norm": 3.2023508548736572, |
| "learning_rate": 4.848353163604331e-06, |
| "loss": 0.6134, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 2.98245906829834, |
| "learning_rate": 4.848139127857867e-06, |
| "loss": 0.7084, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.7115839243498818, |
| "grad_norm": 2.5917441844940186, |
| "learning_rate": 4.847924945903076e-06, |
| "loss": 0.5676, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.7120567375886525, |
| "grad_norm": 2.8736681938171387, |
| "learning_rate": 4.847710617753294e-06, |
| "loss": 0.6304, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.7125295508274232, |
| "grad_norm": 2.7832682132720947, |
| "learning_rate": 4.847496143421866e-06, |
| "loss": 0.5705, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.7130023640661939, |
| "grad_norm": 2.480560779571533, |
| "learning_rate": 4.847281522922147e-06, |
| "loss": 0.5595, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.7134751773049646, |
| "grad_norm": 2.357675313949585, |
| "learning_rate": 4.847066756267499e-06, |
| "loss": 0.5065, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.7139479905437353, |
| "grad_norm": 2.632669448852539, |
| "learning_rate": 4.846851843471296e-06, |
| "loss": 0.6949, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.7144208037825059, |
| "grad_norm": 2.7691073417663574, |
| "learning_rate": 4.84663678454692e-06, |
| "loss": 0.6638, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.7148936170212766, |
| "grad_norm": 2.5647685527801514, |
| "learning_rate": 4.846421579507761e-06, |
| "loss": 0.6098, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.7153664302600473, |
| "grad_norm": 2.476701021194458, |
| "learning_rate": 4.846206228367218e-06, |
| "loss": 0.592, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.715839243498818, |
| "grad_norm": 2.805727958679199, |
| "learning_rate": 4.845990731138702e-06, |
| "loss": 0.5466, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.7163120567375887, |
| "grad_norm": 2.551392078399658, |
| "learning_rate": 4.84577508783563e-06, |
| "loss": 0.6039, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.7167848699763594, |
| "grad_norm": 2.6861350536346436, |
| "learning_rate": 4.845559298471429e-06, |
| "loss": 0.6427, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.7172576832151301, |
| "grad_norm": 3.1908371448516846, |
| "learning_rate": 4.845343363059535e-06, |
| "loss": 0.5447, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.7177304964539007, |
| "grad_norm": 2.9021761417388916, |
| "learning_rate": 4.845127281613394e-06, |
| "loss": 0.5836, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.7182033096926714, |
| "grad_norm": 2.476670742034912, |
| "learning_rate": 4.844911054146461e-06, |
| "loss": 0.5863, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.7186761229314421, |
| "grad_norm": 2.662935495376587, |
| "learning_rate": 4.844694680672198e-06, |
| "loss": 0.5678, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.7191489361702128, |
| "grad_norm": 2.677896738052368, |
| "learning_rate": 4.844478161204079e-06, |
| "loss": 0.6195, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.7196217494089835, |
| "grad_norm": 2.781921863555908, |
| "learning_rate": 4.844261495755585e-06, |
| "loss": 0.643, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.7200945626477542, |
| "grad_norm": 3.0157392024993896, |
| "learning_rate": 4.844044684340206e-06, |
| "loss": 0.7559, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.7205673758865249, |
| "grad_norm": 2.8109354972839355, |
| "learning_rate": 4.843827726971444e-06, |
| "loss": 0.6264, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.7210401891252955, |
| "grad_norm": 3.0953569412231445, |
| "learning_rate": 4.8436106236628064e-06, |
| "loss": 0.6429, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.7215130023640662, |
| "grad_norm": 2.6850643157958984, |
| "learning_rate": 4.843393374427812e-06, |
| "loss": 0.6598, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.7219858156028369, |
| "grad_norm": 3.043480634689331, |
| "learning_rate": 4.8431759792799874e-06, |
| "loss": 0.6331, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.7224586288416076, |
| "grad_norm": 2.723870038986206, |
| "learning_rate": 4.842958438232868e-06, |
| "loss": 0.6259, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.7229314420803783, |
| "grad_norm": 2.822492837905884, |
| "learning_rate": 4.842740751300002e-06, |
| "loss": 0.6554, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.723404255319149, |
| "grad_norm": 2.7866315841674805, |
| "learning_rate": 4.842522918494941e-06, |
| "loss": 0.6991, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.7238770685579197, |
| "grad_norm": 2.8881826400756836, |
| "learning_rate": 4.84230493983125e-06, |
| "loss": 0.5876, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.7243498817966904, |
| "grad_norm": 2.7456939220428467, |
| "learning_rate": 4.8420868153225e-06, |
| "loss": 0.6188, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.724822695035461, |
| "grad_norm": 3.0257532596588135, |
| "learning_rate": 4.841868544982274e-06, |
| "loss": 0.63, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.7252955082742317, |
| "grad_norm": 3.1581954956054688, |
| "learning_rate": 4.841650128824164e-06, |
| "loss": 0.7214, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.7257683215130024, |
| "grad_norm": 2.9174306392669678, |
| "learning_rate": 4.841431566861767e-06, |
| "loss": 0.704, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.7262411347517731, |
| "grad_norm": 2.5019054412841797, |
| "learning_rate": 4.8412128591086935e-06, |
| "loss": 0.6298, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.7267139479905438, |
| "grad_norm": 2.724285125732422, |
| "learning_rate": 4.840994005578562e-06, |
| "loss": 0.6289, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.7271867612293145, |
| "grad_norm": 2.5882341861724854, |
| "learning_rate": 4.840775006284998e-06, |
| "loss": 0.6355, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.7276595744680852, |
| "grad_norm": 3.1281991004943848, |
| "learning_rate": 4.840555861241638e-06, |
| "loss": 0.5551, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.7281323877068558, |
| "grad_norm": 2.6064817905426025, |
| "learning_rate": 4.840336570462127e-06, |
| "loss": 0.5543, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.7286052009456265, |
| "grad_norm": 2.67112398147583, |
| "learning_rate": 4.840117133960122e-06, |
| "loss": 0.6044, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.7290780141843972, |
| "grad_norm": 2.838022232055664, |
| "learning_rate": 4.839897551749282e-06, |
| "loss": 0.6814, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.7295508274231679, |
| "grad_norm": 2.8897151947021484, |
| "learning_rate": 4.839677823843283e-06, |
| "loss": 0.593, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.7300236406619386, |
| "grad_norm": 2.9238014221191406, |
| "learning_rate": 4.839457950255805e-06, |
| "loss": 0.5544, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.7304964539007093, |
| "grad_norm": 3.016876459121704, |
| "learning_rate": 4.839237931000538e-06, |
| "loss": 0.6099, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.7309692671394799, |
| "grad_norm": 2.9415392875671387, |
| "learning_rate": 4.839017766091182e-06, |
| "loss": 0.6413, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.7314420803782505, |
| "grad_norm": 2.658067226409912, |
| "learning_rate": 4.838797455541446e-06, |
| "loss": 0.6534, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.7319148936170212, |
| "grad_norm": 2.460358142852783, |
| "learning_rate": 4.838576999365049e-06, |
| "loss": 0.5307, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.7323877068557919, |
| "grad_norm": 2.5818674564361572, |
| "learning_rate": 4.838356397575716e-06, |
| "loss": 0.6265, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.7328605200945626, |
| "grad_norm": 3.009197473526001, |
| "learning_rate": 4.838135650187183e-06, |
| "loss": 0.6957, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.7333333333333333, |
| "grad_norm": 2.738543748855591, |
| "learning_rate": 4.837914757213196e-06, |
| "loss": 0.646, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.733806146572104, |
| "grad_norm": 2.8208494186401367, |
| "learning_rate": 4.837693718667508e-06, |
| "loss": 0.5936, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.7342789598108747, |
| "grad_norm": 3.1574649810791016, |
| "learning_rate": 4.837472534563883e-06, |
| "loss": 0.6455, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.7347517730496453, |
| "grad_norm": 2.6737420558929443, |
| "learning_rate": 4.837251204916093e-06, |
| "loss": 0.5921, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.735224586288416, |
| "grad_norm": 2.424983024597168, |
| "learning_rate": 4.837029729737918e-06, |
| "loss": 0.6346, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.7356973995271867, |
| "grad_norm": 2.5163493156433105, |
| "learning_rate": 4.836808109043151e-06, |
| "loss": 0.6061, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.7361702127659574, |
| "grad_norm": 2.8377044200897217, |
| "learning_rate": 4.836586342845588e-06, |
| "loss": 0.611, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.7366430260047281, |
| "grad_norm": 2.5929181575775146, |
| "learning_rate": 4.83636443115904e-06, |
| "loss": 0.5496, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.7371158392434988, |
| "grad_norm": 2.5017223358154297, |
| "learning_rate": 4.836142373997323e-06, |
| "loss": 0.6235, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.7375886524822695, |
| "grad_norm": 2.822500228881836, |
| "learning_rate": 4.835920171374265e-06, |
| "loss": 0.6147, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.7380614657210401, |
| "grad_norm": 2.7234230041503906, |
| "learning_rate": 4.8356978233037e-06, |
| "loss": 0.6228, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.7385342789598108, |
| "grad_norm": 2.9565515518188477, |
| "learning_rate": 4.835475329799472e-06, |
| "loss": 0.5728, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.7390070921985815, |
| "grad_norm": 2.4356038570404053, |
| "learning_rate": 4.835252690875438e-06, |
| "loss": 0.6723, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.7394799054373522, |
| "grad_norm": 2.765913248062134, |
| "learning_rate": 4.835029906545458e-06, |
| "loss": 0.5805, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.7399527186761229, |
| "grad_norm": 2.4481914043426514, |
| "learning_rate": 4.834806976823405e-06, |
| "loss": 0.599, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.7404255319148936, |
| "grad_norm": 2.620779514312744, |
| "learning_rate": 4.834583901723158e-06, |
| "loss": 0.63, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.7408983451536643, |
| "grad_norm": 2.654426097869873, |
| "learning_rate": 4.83436068125861e-06, |
| "loss": 0.6544, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.741371158392435, |
| "grad_norm": 2.589623212814331, |
| "learning_rate": 4.834137315443656e-06, |
| "loss": 0.5596, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.7418439716312056, |
| "grad_norm": 2.572883129119873, |
| "learning_rate": 4.833913804292209e-06, |
| "loss": 0.5974, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.7423167848699763, |
| "grad_norm": 2.8744914531707764, |
| "learning_rate": 4.833690147818181e-06, |
| "loss": 0.5364, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.742789598108747, |
| "grad_norm": 2.9800851345062256, |
| "learning_rate": 4.833466346035502e-06, |
| "loss": 0.6287, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.7432624113475177, |
| "grad_norm": 2.627784490585327, |
| "learning_rate": 4.833242398958105e-06, |
| "loss": 0.621, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.7437352245862884, |
| "grad_norm": 2.5187721252441406, |
| "learning_rate": 4.833018306599933e-06, |
| "loss": 0.5901, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.7442080378250591, |
| "grad_norm": 2.4843688011169434, |
| "learning_rate": 4.832794068974944e-06, |
| "loss": 0.6336, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.7446808510638298, |
| "grad_norm": 2.774911880493164, |
| "learning_rate": 4.832569686097096e-06, |
| "loss": 0.6091, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.7451536643026004, |
| "grad_norm": 3.2562527656555176, |
| "learning_rate": 4.8323451579803615e-06, |
| "loss": 0.7686, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.7456264775413711, |
| "grad_norm": 2.799570083618164, |
| "learning_rate": 4.832120484638721e-06, |
| "loss": 0.6233, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.7460992907801418, |
| "grad_norm": 2.661893367767334, |
| "learning_rate": 4.831895666086164e-06, |
| "loss": 0.5841, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.7465721040189125, |
| "grad_norm": 3.0382652282714844, |
| "learning_rate": 4.831670702336689e-06, |
| "loss": 0.5769, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.7470449172576832, |
| "grad_norm": 2.676398515701294, |
| "learning_rate": 4.831445593404304e-06, |
| "loss": 0.619, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.7475177304964539, |
| "grad_norm": 2.717916965484619, |
| "learning_rate": 4.831220339303024e-06, |
| "loss": 0.5787, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.7479905437352246, |
| "grad_norm": 2.3918066024780273, |
| "learning_rate": 4.830994940046876e-06, |
| "loss": 0.5108, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.7484633569739952, |
| "grad_norm": 2.709144115447998, |
| "learning_rate": 4.830769395649895e-06, |
| "loss": 0.6875, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.7489361702127659, |
| "grad_norm": 2.8711116313934326, |
| "learning_rate": 4.830543706126123e-06, |
| "loss": 0.6745, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.7494089834515366, |
| "grad_norm": 2.612339496612549, |
| "learning_rate": 4.830317871489614e-06, |
| "loss": 0.5738, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.7498817966903073, |
| "grad_norm": 2.4355857372283936, |
| "learning_rate": 4.830091891754429e-06, |
| "loss": 0.5907, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.750354609929078, |
| "grad_norm": 2.676051378250122, |
| "learning_rate": 4.829865766934638e-06, |
| "loss": 0.6628, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.7508274231678487, |
| "grad_norm": 2.66489839553833, |
| "learning_rate": 4.829639497044323e-06, |
| "loss": 0.5984, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.7513002364066194, |
| "grad_norm": 2.5358035564422607, |
| "learning_rate": 4.829413082097572e-06, |
| "loss": 0.5867, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.75177304964539, |
| "grad_norm": 2.6530144214630127, |
| "learning_rate": 4.8291865221084815e-06, |
| "loss": 0.5917, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.7522458628841607, |
| "grad_norm": 2.5160958766937256, |
| "learning_rate": 4.82895981709116e-06, |
| "loss": 0.6347, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.7527186761229314, |
| "grad_norm": 2.61592698097229, |
| "learning_rate": 4.8287329670597225e-06, |
| "loss": 0.5472, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.7531914893617021, |
| "grad_norm": 2.7528622150421143, |
| "learning_rate": 4.828505972028296e-06, |
| "loss": 0.5842, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.7536643026004728, |
| "grad_norm": 2.8154072761535645, |
| "learning_rate": 4.828278832011011e-06, |
| "loss": 0.5757, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.7541371158392435, |
| "grad_norm": 3.118515729904175, |
| "learning_rate": 4.828051547022013e-06, |
| "loss": 0.6472, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.7546099290780142, |
| "grad_norm": 2.452033758163452, |
| "learning_rate": 4.827824117075453e-06, |
| "loss": 0.5571, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.7550827423167848, |
| "grad_norm": 2.984388828277588, |
| "learning_rate": 4.827596542185492e-06, |
| "loss": 0.6656, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.7555555555555555, |
| "grad_norm": 2.61356782913208, |
| "learning_rate": 4.8273688223663014e-06, |
| "loss": 0.6444, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.7560283687943262, |
| "grad_norm": 2.8967196941375732, |
| "learning_rate": 4.8271409576320595e-06, |
| "loss": 0.6457, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.7565011820330969, |
| "grad_norm": 2.852367639541626, |
| "learning_rate": 4.826912947996954e-06, |
| "loss": 0.5629, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7569739952718676, |
| "grad_norm": 2.905280590057373, |
| "learning_rate": 4.826684793475182e-06, |
| "loss": 0.6245, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.7574468085106383, |
| "grad_norm": 2.6156530380249023, |
| "learning_rate": 4.826456494080951e-06, |
| "loss": 0.5869, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.757919621749409, |
| "grad_norm": 2.6490228176116943, |
| "learning_rate": 4.826228049828475e-06, |
| "loss": 0.5461, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.7583924349881797, |
| "grad_norm": 2.9626693725585938, |
| "learning_rate": 4.825999460731978e-06, |
| "loss": 0.6842, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.7588652482269503, |
| "grad_norm": 2.6866023540496826, |
| "learning_rate": 4.825770726805695e-06, |
| "loss": 0.5726, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.759338061465721, |
| "grad_norm": 2.5525858402252197, |
| "learning_rate": 4.825541848063866e-06, |
| "loss": 0.6061, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.7598108747044917, |
| "grad_norm": 2.703977584838867, |
| "learning_rate": 4.825312824520743e-06, |
| "loss": 0.6726, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.7602836879432624, |
| "grad_norm": 2.856534957885742, |
| "learning_rate": 4.825083656190588e-06, |
| "loss": 0.625, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.7607565011820331, |
| "grad_norm": 2.8564887046813965, |
| "learning_rate": 4.824854343087668e-06, |
| "loss": 0.7251, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.7612293144208038, |
| "grad_norm": 2.327650308609009, |
| "learning_rate": 4.824624885226262e-06, |
| "loss": 0.526, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.7617021276595745, |
| "grad_norm": 3.0025737285614014, |
| "learning_rate": 4.824395282620659e-06, |
| "loss": 0.6043, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.7621749408983451, |
| "grad_norm": 2.5441737174987793, |
| "learning_rate": 4.824165535285152e-06, |
| "loss": 0.6276, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.7626477541371158, |
| "grad_norm": 2.4177372455596924, |
| "learning_rate": 4.823935643234049e-06, |
| "loss": 0.6419, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.7631205673758865, |
| "grad_norm": 2.9210550785064697, |
| "learning_rate": 4.823705606481664e-06, |
| "loss": 0.5663, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.7635933806146572, |
| "grad_norm": 2.6353724002838135, |
| "learning_rate": 4.82347542504232e-06, |
| "loss": 0.5669, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.7640661938534279, |
| "grad_norm": 2.419081926345825, |
| "learning_rate": 4.823245098930349e-06, |
| "loss": 0.5777, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.7645390070921986, |
| "grad_norm": 2.5077571868896484, |
| "learning_rate": 4.823014628160093e-06, |
| "loss": 0.5924, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.7650118203309693, |
| "grad_norm": 2.816056251525879, |
| "learning_rate": 4.822784012745902e-06, |
| "loss": 0.7273, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.76548463356974, |
| "grad_norm": 2.7163147926330566, |
| "learning_rate": 4.8225532527021366e-06, |
| "loss": 0.5545, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 2.4784302711486816, |
| "learning_rate": 4.822322348043164e-06, |
| "loss": 0.556, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7664302600472813, |
| "grad_norm": 2.712467670440674, |
| "learning_rate": 4.822091298783361e-06, |
| "loss": 0.6501, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.766903073286052, |
| "grad_norm": 2.7217724323272705, |
| "learning_rate": 4.821860104937115e-06, |
| "loss": 0.5989, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.7673758865248227, |
| "grad_norm": 2.5622854232788086, |
| "learning_rate": 4.821628766518821e-06, |
| "loss": 0.5263, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.7678486997635934, |
| "grad_norm": 3.230923891067505, |
| "learning_rate": 4.821397283542884e-06, |
| "loss": 0.6707, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.7683215130023641, |
| "grad_norm": 2.37929105758667, |
| "learning_rate": 4.821165656023718e-06, |
| "loss": 0.6124, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.7687943262411348, |
| "grad_norm": 2.9811325073242188, |
| "learning_rate": 4.820933883975745e-06, |
| "loss": 0.6435, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.7692671394799054, |
| "grad_norm": 2.887380838394165, |
| "learning_rate": 4.820701967413395e-06, |
| "loss": 0.621, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.7697399527186761, |
| "grad_norm": 2.6762876510620117, |
| "learning_rate": 4.820469906351109e-06, |
| "loss": 0.5713, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.7702127659574468, |
| "grad_norm": 2.7347512245178223, |
| "learning_rate": 4.820237700803337e-06, |
| "loss": 0.6136, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.7706855791962175, |
| "grad_norm": 2.7244746685028076, |
| "learning_rate": 4.820005350784539e-06, |
| "loss": 0.5816, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7711583924349882, |
| "grad_norm": 2.9293999671936035, |
| "learning_rate": 4.8197728563091795e-06, |
| "loss": 0.6649, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.7716312056737589, |
| "grad_norm": 2.4402127265930176, |
| "learning_rate": 4.819540217391736e-06, |
| "loss": 0.6481, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.7721040189125296, |
| "grad_norm": 3.083941698074341, |
| "learning_rate": 4.819307434046694e-06, |
| "loss": 0.6951, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.7725768321513002, |
| "grad_norm": 2.544952392578125, |
| "learning_rate": 4.819074506288548e-06, |
| "loss": 0.539, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.7730496453900709, |
| "grad_norm": 2.7791268825531006, |
| "learning_rate": 4.818841434131801e-06, |
| "loss": 0.5827, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.7735224586288416, |
| "grad_norm": 2.7349796295166016, |
| "learning_rate": 4.818608217590967e-06, |
| "loss": 0.5584, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.7739952718676123, |
| "grad_norm": 2.637652635574341, |
| "learning_rate": 4.818374856680565e-06, |
| "loss": 0.6386, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.774468085106383, |
| "grad_norm": 2.9821584224700928, |
| "learning_rate": 4.818141351415127e-06, |
| "loss": 0.6734, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.7749408983451537, |
| "grad_norm": 2.992938995361328, |
| "learning_rate": 4.817907701809192e-06, |
| "loss": 0.5899, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.7754137115839244, |
| "grad_norm": 4.35719633102417, |
| "learning_rate": 4.8176739078773076e-06, |
| "loss": 0.6281, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.775886524822695, |
| "grad_norm": 2.838146209716797, |
| "learning_rate": 4.8174399696340315e-06, |
| "loss": 0.5766, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.7763593380614657, |
| "grad_norm": 3.3116989135742188, |
| "learning_rate": 4.81720588709393e-06, |
| "loss": 0.6409, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.7768321513002364, |
| "grad_norm": 2.9843590259552, |
| "learning_rate": 4.816971660271579e-06, |
| "loss": 0.6108, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.7773049645390071, |
| "grad_norm": 2.843770742416382, |
| "learning_rate": 4.816737289181562e-06, |
| "loss": 0.6053, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 2.7608556747436523, |
| "learning_rate": 4.816502773838473e-06, |
| "loss": 0.5854, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.7782505910165485, |
| "grad_norm": 3.343682289123535, |
| "learning_rate": 4.816268114256914e-06, |
| "loss": 0.6329, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.7787234042553192, |
| "grad_norm": 2.769768476486206, |
| "learning_rate": 4.816033310451496e-06, |
| "loss": 0.6242, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.7791962174940898, |
| "grad_norm": 2.989851713180542, |
| "learning_rate": 4.815798362436838e-06, |
| "loss": 0.6493, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.7796690307328605, |
| "grad_norm": 3.170736312866211, |
| "learning_rate": 4.8155632702275716e-06, |
| "loss": 0.6341, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.7801418439716312, |
| "grad_norm": 2.7372522354125977, |
| "learning_rate": 4.815328033838334e-06, |
| "loss": 0.5445, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7806146572104019, |
| "grad_norm": 2.6947238445281982, |
| "learning_rate": 4.8150926532837715e-06, |
| "loss": 0.6437, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.7810874704491726, |
| "grad_norm": 2.472323179244995, |
| "learning_rate": 4.81485712857854e-06, |
| "loss": 0.5751, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.7815602836879433, |
| "grad_norm": 2.791114091873169, |
| "learning_rate": 4.814621459737308e-06, |
| "loss": 0.5996, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.782033096926714, |
| "grad_norm": 3.1957521438598633, |
| "learning_rate": 4.814385646774745e-06, |
| "loss": 0.5803, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.7825059101654847, |
| "grad_norm": 2.4120798110961914, |
| "learning_rate": 4.8141496897055364e-06, |
| "loss": 0.5814, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.7829787234042553, |
| "grad_norm": 2.9262423515319824, |
| "learning_rate": 4.813913588544374e-06, |
| "loss": 0.6292, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.783451536643026, |
| "grad_norm": 2.8251047134399414, |
| "learning_rate": 4.813677343305959e-06, |
| "loss": 0.6787, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.7839243498817967, |
| "grad_norm": 2.931659698486328, |
| "learning_rate": 4.8134409540050005e-06, |
| "loss": 0.6163, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.7843971631205674, |
| "grad_norm": 2.7160706520080566, |
| "learning_rate": 4.813204420656219e-06, |
| "loss": 0.6831, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.7848699763593381, |
| "grad_norm": 3.2134454250335693, |
| "learning_rate": 4.81296774327434e-06, |
| "loss": 0.6002, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.7853427895981088, |
| "grad_norm": 2.4002513885498047, |
| "learning_rate": 4.812730921874103e-06, |
| "loss": 0.5488, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.7858156028368795, |
| "grad_norm": 2.5559282302856445, |
| "learning_rate": 4.812493956470251e-06, |
| "loss": 0.5802, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.7862884160756501, |
| "grad_norm": 2.57478404045105, |
| "learning_rate": 4.812256847077541e-06, |
| "loss": 0.646, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.7867612293144208, |
| "grad_norm": 2.811851978302002, |
| "learning_rate": 4.812019593710736e-06, |
| "loss": 0.6245, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.7872340425531915, |
| "grad_norm": 2.5228829383850098, |
| "learning_rate": 4.811782196384609e-06, |
| "loss": 0.5949, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.7877068557919622, |
| "grad_norm": 2.744096040725708, |
| "learning_rate": 4.8115446551139415e-06, |
| "loss": 0.6006, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.7881796690307329, |
| "grad_norm": 3.129242420196533, |
| "learning_rate": 4.811306969913524e-06, |
| "loss": 0.7251, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.7886524822695036, |
| "grad_norm": 2.7855660915374756, |
| "learning_rate": 4.811069140798156e-06, |
| "loss": 0.6534, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.7891252955082743, |
| "grad_norm": 2.836603879928589, |
| "learning_rate": 4.810831167782647e-06, |
| "loss": 0.6661, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.789598108747045, |
| "grad_norm": 2.5339887142181396, |
| "learning_rate": 4.810593050881813e-06, |
| "loss": 0.5354, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.7900709219858156, |
| "grad_norm": 2.9553709030151367, |
| "learning_rate": 4.810354790110482e-06, |
| "loss": 0.6001, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.7905437352245863, |
| "grad_norm": 2.6581788063049316, |
| "learning_rate": 4.8101163854834885e-06, |
| "loss": 0.6802, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.791016548463357, |
| "grad_norm": 3.2002551555633545, |
| "learning_rate": 4.809877837015677e-06, |
| "loss": 0.6641, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.7914893617021277, |
| "grad_norm": 2.918792963027954, |
| "learning_rate": 4.809639144721902e-06, |
| "loss": 0.6758, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.7919621749408984, |
| "grad_norm": 2.7993946075439453, |
| "learning_rate": 4.8094003086170245e-06, |
| "loss": 0.5889, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.7924349881796691, |
| "grad_norm": 2.3698952198028564, |
| "learning_rate": 4.809161328715916e-06, |
| "loss": 0.6244, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.7929078014184398, |
| "grad_norm": 2.8891594409942627, |
| "learning_rate": 4.808922205033458e-06, |
| "loss": 0.5835, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.7933806146572104, |
| "grad_norm": 2.838345766067505, |
| "learning_rate": 4.808682937584537e-06, |
| "loss": 0.6907, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.7938534278959811, |
| "grad_norm": 2.8443174362182617, |
| "learning_rate": 4.808443526384053e-06, |
| "loss": 0.6692, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.7943262411347518, |
| "grad_norm": 2.7355034351348877, |
| "learning_rate": 4.808203971446913e-06, |
| "loss": 0.5799, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.7947990543735225, |
| "grad_norm": 2.7108020782470703, |
| "learning_rate": 4.807964272788033e-06, |
| "loss": 0.652, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.7952718676122932, |
| "grad_norm": 2.397650957107544, |
| "learning_rate": 4.807724430422338e-06, |
| "loss": 0.5418, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.7957446808510639, |
| "grad_norm": 2.4981582164764404, |
| "learning_rate": 4.807484444364762e-06, |
| "loss": 0.5731, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.7962174940898346, |
| "grad_norm": 2.7943713665008545, |
| "learning_rate": 4.8072443146302475e-06, |
| "loss": 0.5913, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.7966903073286052, |
| "grad_norm": 2.5691423416137695, |
| "learning_rate": 4.807004041233746e-06, |
| "loss": 0.6475, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.7971631205673759, |
| "grad_norm": 3.2367498874664307, |
| "learning_rate": 4.8067636241902195e-06, |
| "loss": 0.675, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.7976359338061466, |
| "grad_norm": 3.000595808029175, |
| "learning_rate": 4.806523063514637e-06, |
| "loss": 0.5481, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.7981087470449173, |
| "grad_norm": 2.702014207839966, |
| "learning_rate": 4.806282359221976e-06, |
| "loss": 0.5993, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.798581560283688, |
| "grad_norm": 2.383671998977661, |
| "learning_rate": 4.806041511327226e-06, |
| "loss": 0.562, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.7990543735224587, |
| "grad_norm": 2.6965041160583496, |
| "learning_rate": 4.8058005198453834e-06, |
| "loss": 0.5955, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.7995271867612294, |
| "grad_norm": 2.5906765460968018, |
| "learning_rate": 4.805559384791453e-06, |
| "loss": 0.5151, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.5454652309417725, |
| "learning_rate": 4.8053181061804475e-06, |
| "loss": 0.5843, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.8004728132387707, |
| "grad_norm": 2.661343812942505, |
| "learning_rate": 4.8050766840273935e-06, |
| "loss": 0.5995, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.8009456264775414, |
| "grad_norm": 2.5635924339294434, |
| "learning_rate": 4.8048351183473215e-06, |
| "loss": 0.5676, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.8014184397163121, |
| "grad_norm": 2.5936667919158936, |
| "learning_rate": 4.804593409155274e-06, |
| "loss": 0.6291, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.8018912529550828, |
| "grad_norm": 2.6902432441711426, |
| "learning_rate": 4.804351556466299e-06, |
| "loss": 0.6114, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.8023640661938535, |
| "grad_norm": 2.7764673233032227, |
| "learning_rate": 4.804109560295457e-06, |
| "loss": 0.5768, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.8028368794326242, |
| "grad_norm": 2.9587221145629883, |
| "learning_rate": 4.803867420657816e-06, |
| "loss": 0.6048, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.8033096926713948, |
| "grad_norm": 2.9238998889923096, |
| "learning_rate": 4.803625137568453e-06, |
| "loss": 0.6329, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.8037825059101655, |
| "grad_norm": 2.70473313331604, |
| "learning_rate": 4.803382711042455e-06, |
| "loss": 0.5427, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.8042553191489362, |
| "grad_norm": 3.1604979038238525, |
| "learning_rate": 4.803140141094914e-06, |
| "loss": 0.626, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.8047281323877069, |
| "grad_norm": 2.9567699432373047, |
| "learning_rate": 4.802897427740936e-06, |
| "loss": 0.5319, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.8052009456264776, |
| "grad_norm": 2.90983247756958, |
| "learning_rate": 4.802654570995632e-06, |
| "loss": 0.586, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.8056737588652483, |
| "grad_norm": 2.783480167388916, |
| "learning_rate": 4.8024115708741255e-06, |
| "loss": 0.5773, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.806146572104019, |
| "grad_norm": 3.3307793140411377, |
| "learning_rate": 4.802168427391547e-06, |
| "loss": 0.6257, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.8066193853427897, |
| "grad_norm": 3.0475001335144043, |
| "learning_rate": 4.801925140563034e-06, |
| "loss": 0.6612, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.8070921985815603, |
| "grad_norm": 2.8278894424438477, |
| "learning_rate": 4.8016817104037375e-06, |
| "loss": 0.6449, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.807565011820331, |
| "grad_norm": 2.760244369506836, |
| "learning_rate": 4.801438136928812e-06, |
| "loss": 0.7007, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.8080378250591016, |
| "grad_norm": 2.827634572982788, |
| "learning_rate": 4.801194420153427e-06, |
| "loss": 0.6418, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.8085106382978723, |
| "grad_norm": 2.8655009269714355, |
| "learning_rate": 4.800950560092754e-06, |
| "loss": 0.6231, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.808983451536643, |
| "grad_norm": 2.738112688064575, |
| "learning_rate": 4.800706556761981e-06, |
| "loss": 0.6463, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.8094562647754137, |
| "grad_norm": 2.4781179428100586, |
| "learning_rate": 4.800462410176296e-06, |
| "loss": 0.5365, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.8099290780141843, |
| "grad_norm": 2.6049838066101074, |
| "learning_rate": 4.800218120350906e-06, |
| "loss": 0.6035, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.810401891252955, |
| "grad_norm": 2.9089980125427246, |
| "learning_rate": 4.79997368730102e-06, |
| "loss": 0.5828, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.8108747044917257, |
| "grad_norm": 2.831871747970581, |
| "learning_rate": 4.799729111041857e-06, |
| "loss": 0.5953, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.8113475177304964, |
| "grad_norm": 2.5611300468444824, |
| "learning_rate": 4.799484391588647e-06, |
| "loss": 0.6302, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.8118203309692671, |
| "grad_norm": 2.744070053100586, |
| "learning_rate": 4.799239528956625e-06, |
| "loss": 0.5561, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.8122931442080378, |
| "grad_norm": 2.7344231605529785, |
| "learning_rate": 4.798994523161041e-06, |
| "loss": 0.6317, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.8127659574468085, |
| "grad_norm": 2.3420889377593994, |
| "learning_rate": 4.798749374217149e-06, |
| "loss": 0.5415, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.8132387706855791, |
| "grad_norm": 2.57384991645813, |
| "learning_rate": 4.798504082140212e-06, |
| "loss": 0.6383, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.8137115839243498, |
| "grad_norm": 2.8819844722747803, |
| "learning_rate": 4.798258646945505e-06, |
| "loss": 0.6355, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.8141843971631205, |
| "grad_norm": 2.908123254776001, |
| "learning_rate": 4.79801306864831e-06, |
| "loss": 0.701, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.8146572104018912, |
| "grad_norm": 2.6500701904296875, |
| "learning_rate": 4.797767347263917e-06, |
| "loss": 0.6152, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.8151300236406619, |
| "grad_norm": 2.5513017177581787, |
| "learning_rate": 4.797521482807628e-06, |
| "loss": 0.6241, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.8156028368794326, |
| "grad_norm": 2.6239185333251953, |
| "learning_rate": 4.7972754752947495e-06, |
| "loss": 0.6072, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.8160756501182033, |
| "grad_norm": 2.673436403274536, |
| "learning_rate": 4.797029324740601e-06, |
| "loss": 0.5802, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.816548463356974, |
| "grad_norm": 2.533831834793091, |
| "learning_rate": 4.796783031160508e-06, |
| "loss": 0.5566, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.8170212765957446, |
| "grad_norm": 2.9806582927703857, |
| "learning_rate": 4.796536594569807e-06, |
| "loss": 0.6945, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.8174940898345153, |
| "grad_norm": 2.7093560695648193, |
| "learning_rate": 4.796290014983842e-06, |
| "loss": 0.7143, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.817966903073286, |
| "grad_norm": 2.814507246017456, |
| "learning_rate": 4.796043292417967e-06, |
| "loss": 0.6122, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.8184397163120567, |
| "grad_norm": 2.537156820297241, |
| "learning_rate": 4.795796426887543e-06, |
| "loss": 0.6229, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.8189125295508274, |
| "grad_norm": 2.4878013134002686, |
| "learning_rate": 4.795549418407944e-06, |
| "loss": 0.5442, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.8193853427895981, |
| "grad_norm": 2.839383363723755, |
| "learning_rate": 4.795302266994548e-06, |
| "loss": 0.6717, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.8198581560283688, |
| "grad_norm": 3.1981801986694336, |
| "learning_rate": 4.795054972662744e-06, |
| "loss": 0.6596, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.8203309692671394, |
| "grad_norm": 2.781730890274048, |
| "learning_rate": 4.79480753542793e-06, |
| "loss": 0.5845, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.8208037825059101, |
| "grad_norm": 2.689948558807373, |
| "learning_rate": 4.794559955305513e-06, |
| "loss": 0.5928, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.8212765957446808, |
| "grad_norm": 2.7267637252807617, |
| "learning_rate": 4.7943122323109105e-06, |
| "loss": 0.5224, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.8217494089834515, |
| "grad_norm": 2.4346601963043213, |
| "learning_rate": 4.794064366459544e-06, |
| "loss": 0.6431, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.8222222222222222, |
| "grad_norm": 2.7440176010131836, |
| "learning_rate": 4.793816357766849e-06, |
| "loss": 0.6083, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.8226950354609929, |
| "grad_norm": 2.6558027267456055, |
| "learning_rate": 4.793568206248268e-06, |
| "loss": 0.698, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.8231678486997636, |
| "grad_norm": 2.591658353805542, |
| "learning_rate": 4.793319911919251e-06, |
| "loss": 0.6601, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.8236406619385342, |
| "grad_norm": 2.5431172847747803, |
| "learning_rate": 4.79307147479526e-06, |
| "loss": 0.5917, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.8241134751773049, |
| "grad_norm": 2.7335588932037354, |
| "learning_rate": 4.792822894891762e-06, |
| "loss": 0.5925, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.8245862884160756, |
| "grad_norm": 2.2500839233398438, |
| "learning_rate": 4.792574172224237e-06, |
| "loss": 0.4984, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.8250591016548463, |
| "grad_norm": 2.691343069076538, |
| "learning_rate": 4.79232530680817e-06, |
| "loss": 0.6262, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.825531914893617, |
| "grad_norm": 2.612204074859619, |
| "learning_rate": 4.792076298659058e-06, |
| "loss": 0.5822, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.8260047281323877, |
| "grad_norm": 3.0163519382476807, |
| "learning_rate": 4.791827147792406e-06, |
| "loss": 0.6263, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.8264775413711584, |
| "grad_norm": 2.742183208465576, |
| "learning_rate": 4.791577854223727e-06, |
| "loss": 0.6628, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.826950354609929, |
| "grad_norm": 2.872213840484619, |
| "learning_rate": 4.791328417968542e-06, |
| "loss": 0.6332, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.8274231678486997, |
| "grad_norm": 2.725006580352783, |
| "learning_rate": 4.7910788390423844e-06, |
| "loss": 0.6266, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.8278959810874704, |
| "grad_norm": 3.0366697311401367, |
| "learning_rate": 4.790829117460793e-06, |
| "loss": 0.6403, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.8283687943262411, |
| "grad_norm": 2.594881772994995, |
| "learning_rate": 4.790579253239318e-06, |
| "loss": 0.521, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.8288416075650118, |
| "grad_norm": 2.4496347904205322, |
| "learning_rate": 4.790329246393517e-06, |
| "loss": 0.54, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.8293144208037825, |
| "grad_norm": 3.102278470993042, |
| "learning_rate": 4.790079096938956e-06, |
| "loss": 0.6142, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.8297872340425532, |
| "grad_norm": 2.4645912647247314, |
| "learning_rate": 4.789828804891212e-06, |
| "loss": 0.5212, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.8302600472813239, |
| "grad_norm": 2.7482516765594482, |
| "learning_rate": 4.789578370265868e-06, |
| "loss": 0.6712, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.8307328605200945, |
| "grad_norm": 2.61360502243042, |
| "learning_rate": 4.7893277930785195e-06, |
| "loss": 0.6367, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.8312056737588652, |
| "grad_norm": 2.79028058052063, |
| "learning_rate": 4.789077073344767e-06, |
| "loss": 0.5099, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.8316784869976359, |
| "grad_norm": 2.647662401199341, |
| "learning_rate": 4.788826211080222e-06, |
| "loss": 0.6698, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.8321513002364066, |
| "grad_norm": 3.0214831829071045, |
| "learning_rate": 4.7885752063005055e-06, |
| "loss": 0.6121, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.8326241134751773, |
| "grad_norm": 2.8244032859802246, |
| "learning_rate": 4.788324059021247e-06, |
| "loss": 0.6921, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.833096926713948, |
| "grad_norm": 3.1501076221466064, |
| "learning_rate": 4.788072769258082e-06, |
| "loss": 0.6872, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.8335697399527187, |
| "grad_norm": 2.6989903450012207, |
| "learning_rate": 4.7878213370266594e-06, |
| "loss": 0.5884, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.8340425531914893, |
| "grad_norm": 2.6982665061950684, |
| "learning_rate": 4.787569762342633e-06, |
| "loss": 0.6112, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.83451536643026, |
| "grad_norm": 2.6918323040008545, |
| "learning_rate": 4.7873180452216685e-06, |
| "loss": 0.5315, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.8349881796690307, |
| "grad_norm": 2.5494401454925537, |
| "learning_rate": 4.78706618567944e-06, |
| "loss": 0.5909, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.8354609929078014, |
| "grad_norm": 2.7532095909118652, |
| "learning_rate": 4.786814183731627e-06, |
| "loss": 0.5566, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.8359338061465721, |
| "grad_norm": 2.550865888595581, |
| "learning_rate": 4.786562039393923e-06, |
| "loss": 0.555, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.8364066193853428, |
| "grad_norm": 2.4477791786193848, |
| "learning_rate": 4.786309752682028e-06, |
| "loss": 0.5844, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.8368794326241135, |
| "grad_norm": 2.6982262134552, |
| "learning_rate": 4.7860573236116485e-06, |
| "loss": 0.6136, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.8373522458628841, |
| "grad_norm": 2.456263542175293, |
| "learning_rate": 4.785804752198503e-06, |
| "loss": 0.5055, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.8378250591016548, |
| "grad_norm": 2.428544521331787, |
| "learning_rate": 4.78555203845832e-06, |
| "loss": 0.5859, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.8382978723404255, |
| "grad_norm": 2.1782307624816895, |
| "learning_rate": 4.785299182406833e-06, |
| "loss": 0.5325, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.8387706855791962, |
| "grad_norm": 3.137956142425537, |
| "learning_rate": 4.785046184059786e-06, |
| "loss": 0.6097, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.8392434988179669, |
| "grad_norm": 2.6269001960754395, |
| "learning_rate": 4.7847930434329336e-06, |
| "loss": 0.5972, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.8397163120567376, |
| "grad_norm": 2.732659339904785, |
| "learning_rate": 4.784539760542037e-06, |
| "loss": 0.6054, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.8401891252955083, |
| "grad_norm": 2.5346736907958984, |
| "learning_rate": 4.784286335402866e-06, |
| "loss": 0.5521, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.840661938534279, |
| "grad_norm": 3.1420228481292725, |
| "learning_rate": 4.784032768031202e-06, |
| "loss": 0.6165, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.8411347517730496, |
| "grad_norm": 3.073793411254883, |
| "learning_rate": 4.783779058442831e-06, |
| "loss": 0.6414, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.8416075650118203, |
| "grad_norm": 2.6621336936950684, |
| "learning_rate": 4.783525206653554e-06, |
| "loss": 0.5836, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.842080378250591, |
| "grad_norm": 2.7029049396514893, |
| "learning_rate": 4.7832712126791745e-06, |
| "loss": 0.5897, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.8425531914893617, |
| "grad_norm": 2.4733822345733643, |
| "learning_rate": 4.783017076535509e-06, |
| "loss": 0.5913, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.8430260047281324, |
| "grad_norm": 2.8119473457336426, |
| "learning_rate": 4.782762798238381e-06, |
| "loss": 0.6105, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.8434988179669031, |
| "grad_norm": 2.5290818214416504, |
| "learning_rate": 4.782508377803622e-06, |
| "loss": 0.6119, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.8439716312056738, |
| "grad_norm": 3.193472385406494, |
| "learning_rate": 4.782253815247076e-06, |
| "loss": 0.6665, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.8444444444444444, |
| "grad_norm": 3.206759452819824, |
| "learning_rate": 4.781999110584592e-06, |
| "loss": 0.6012, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.8449172576832151, |
| "grad_norm": 2.6227457523345947, |
| "learning_rate": 4.781744263832029e-06, |
| "loss": 0.5845, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.8453900709219858, |
| "grad_norm": 2.838365316390991, |
| "learning_rate": 4.781489275005257e-06, |
| "loss": 0.5695, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.8458628841607565, |
| "grad_norm": 2.8348326683044434, |
| "learning_rate": 4.78123414412015e-06, |
| "loss": 0.6136, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.8463356973995272, |
| "grad_norm": 2.5698344707489014, |
| "learning_rate": 4.780978871192597e-06, |
| "loss": 0.6576, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.8468085106382979, |
| "grad_norm": 2.5198330879211426, |
| "learning_rate": 4.780723456238492e-06, |
| "loss": 0.5521, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.8472813238770686, |
| "grad_norm": 3.001325845718384, |
| "learning_rate": 4.780467899273737e-06, |
| "loss": 0.6075, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.8477541371158392, |
| "grad_norm": 2.7732746601104736, |
| "learning_rate": 4.780212200314247e-06, |
| "loss": 0.6245, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.8482269503546099, |
| "grad_norm": 2.6950337886810303, |
| "learning_rate": 4.77995635937594e-06, |
| "loss": 0.5723, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.8486997635933806, |
| "grad_norm": 2.82051420211792, |
| "learning_rate": 4.779700376474749e-06, |
| "loss": 0.6184, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.8491725768321513, |
| "grad_norm": 2.757791757583618, |
| "learning_rate": 4.779444251626611e-06, |
| "loss": 0.608, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.849645390070922, |
| "grad_norm": 2.394108533859253, |
| "learning_rate": 4.779187984847475e-06, |
| "loss": 0.6174, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.8501182033096927, |
| "grad_norm": 2.427562713623047, |
| "learning_rate": 4.778931576153296e-06, |
| "loss": 0.5618, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.8505910165484634, |
| "grad_norm": 2.891268491744995, |
| "learning_rate": 4.778675025560042e-06, |
| "loss": 0.6865, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 2.665534257888794, |
| "learning_rate": 4.778418333083685e-06, |
| "loss": 0.5852, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8515366430260047, |
| "grad_norm": 2.5492889881134033, |
| "learning_rate": 4.7781614987402095e-06, |
| "loss": 0.5161, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.8520094562647754, |
| "grad_norm": 2.400177001953125, |
| "learning_rate": 4.777904522545607e-06, |
| "loss": 0.5128, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.8524822695035461, |
| "grad_norm": 2.3949809074401855, |
| "learning_rate": 4.777647404515878e-06, |
| "loss": 0.571, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.8529550827423168, |
| "grad_norm": 2.3624472618103027, |
| "learning_rate": 4.7773901446670325e-06, |
| "loss": 0.5486, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.8534278959810875, |
| "grad_norm": 2.711366891860962, |
| "learning_rate": 4.7771327430150885e-06, |
| "loss": 0.5667, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.8539007092198582, |
| "grad_norm": 2.7681493759155273, |
| "learning_rate": 4.776875199576073e-06, |
| "loss": 0.5686, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.8543735224586289, |
| "grad_norm": 3.0369436740875244, |
| "learning_rate": 4.776617514366023e-06, |
| "loss": 0.6635, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.8548463356973995, |
| "grad_norm": 2.919649600982666, |
| "learning_rate": 4.776359687400983e-06, |
| "loss": 0.5749, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.8553191489361702, |
| "grad_norm": 2.7986185550689697, |
| "learning_rate": 4.776101718697007e-06, |
| "loss": 0.559, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.8557919621749409, |
| "grad_norm": 2.5951223373413086, |
| "learning_rate": 4.775843608270158e-06, |
| "loss": 0.5654, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.8562647754137116, |
| "grad_norm": 2.674138069152832, |
| "learning_rate": 4.775585356136505e-06, |
| "loss": 0.5286, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.8567375886524823, |
| "grad_norm": 3.045437812805176, |
| "learning_rate": 4.775326962312131e-06, |
| "loss": 0.6185, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.857210401891253, |
| "grad_norm": 2.6145293712615967, |
| "learning_rate": 4.775068426813124e-06, |
| "loss": 0.6075, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.8576832151300237, |
| "grad_norm": 2.6320106983184814, |
| "learning_rate": 4.7748097496555824e-06, |
| "loss": 0.561, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.8581560283687943, |
| "grad_norm": 2.5038623809814453, |
| "learning_rate": 4.774550930855612e-06, |
| "loss": 0.593, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.858628841607565, |
| "grad_norm": 2.8168089389801025, |
| "learning_rate": 4.774291970429329e-06, |
| "loss": 0.5196, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.8591016548463357, |
| "grad_norm": 2.778130292892456, |
| "learning_rate": 4.774032868392858e-06, |
| "loss": 0.5984, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.8595744680851064, |
| "grad_norm": 2.536458730697632, |
| "learning_rate": 4.7737736247623305e-06, |
| "loss": 0.568, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.8600472813238771, |
| "grad_norm": 2.6669719219207764, |
| "learning_rate": 4.77351423955389e-06, |
| "loss": 0.6233, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.8605200945626478, |
| "grad_norm": 2.578242540359497, |
| "learning_rate": 4.773254712783687e-06, |
| "loss": 0.579, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.8609929078014185, |
| "grad_norm": 2.816664457321167, |
| "learning_rate": 4.772995044467881e-06, |
| "loss": 0.6635, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.8614657210401891, |
| "grad_norm": 3.1111979484558105, |
| "learning_rate": 4.77273523462264e-06, |
| "loss": 0.6372, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.8619385342789598, |
| "grad_norm": 2.764552354812622, |
| "learning_rate": 4.772475283264142e-06, |
| "loss": 0.6216, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.8624113475177305, |
| "grad_norm": 2.9126830101013184, |
| "learning_rate": 4.772215190408572e-06, |
| "loss": 0.6396, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.8628841607565012, |
| "grad_norm": 2.7502307891845703, |
| "learning_rate": 4.7719549560721264e-06, |
| "loss": 0.6186, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.8633569739952719, |
| "grad_norm": 2.6279006004333496, |
| "learning_rate": 4.771694580271007e-06, |
| "loss": 0.5557, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.8638297872340426, |
| "grad_norm": 2.996563196182251, |
| "learning_rate": 4.7714340630214276e-06, |
| "loss": 0.6259, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.8643026004728133, |
| "grad_norm": 3.231323480606079, |
| "learning_rate": 4.771173404339609e-06, |
| "loss": 0.5473, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.864775413711584, |
| "grad_norm": 3.143519878387451, |
| "learning_rate": 4.770912604241781e-06, |
| "loss": 0.593, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.8652482269503546, |
| "grad_norm": 2.515484094619751, |
| "learning_rate": 4.770651662744184e-06, |
| "loss": 0.538, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.8657210401891253, |
| "grad_norm": 2.629058837890625, |
| "learning_rate": 4.770390579863064e-06, |
| "loss": 0.5745, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.866193853427896, |
| "grad_norm": 2.5826802253723145, |
| "learning_rate": 4.770129355614677e-06, |
| "loss": 0.6397, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.8666666666666667, |
| "grad_norm": 2.954623222351074, |
| "learning_rate": 4.769867990015289e-06, |
| "loss": 0.6106, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.8671394799054374, |
| "grad_norm": 2.742192268371582, |
| "learning_rate": 4.769606483081175e-06, |
| "loss": 0.6902, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.8676122931442081, |
| "grad_norm": 2.2619097232818604, |
| "learning_rate": 4.769344834828618e-06, |
| "loss": 0.5414, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.8680851063829788, |
| "grad_norm": 2.7384188175201416, |
| "learning_rate": 4.769083045273908e-06, |
| "loss": 0.5787, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.8685579196217494, |
| "grad_norm": 2.6734485626220703, |
| "learning_rate": 4.768821114433346e-06, |
| "loss": 0.5923, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.8690307328605201, |
| "grad_norm": 2.286140203475952, |
| "learning_rate": 4.768559042323243e-06, |
| "loss": 0.5822, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.8695035460992908, |
| "grad_norm": 3.0243725776672363, |
| "learning_rate": 4.768296828959915e-06, |
| "loss": 0.6623, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.8699763593380615, |
| "grad_norm": 2.4026312828063965, |
| "learning_rate": 4.768034474359689e-06, |
| "loss": 0.5554, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8704491725768322, |
| "grad_norm": 2.7469029426574707, |
| "learning_rate": 4.767771978538903e-06, |
| "loss": 0.6316, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.8709219858156029, |
| "grad_norm": 2.729659080505371, |
| "learning_rate": 4.767509341513899e-06, |
| "loss": 0.5807, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.8713947990543736, |
| "grad_norm": 2.5336945056915283, |
| "learning_rate": 4.76724656330103e-06, |
| "loss": 0.6109, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.8718676122931442, |
| "grad_norm": 2.519880533218384, |
| "learning_rate": 4.76698364391666e-06, |
| "loss": 0.5313, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.8723404255319149, |
| "grad_norm": 2.698862075805664, |
| "learning_rate": 4.766720583377159e-06, |
| "loss": 0.5953, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.8728132387706856, |
| "grad_norm": 3.0195560455322266, |
| "learning_rate": 4.766457381698907e-06, |
| "loss": 0.5965, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.8732860520094563, |
| "grad_norm": 2.5972697734832764, |
| "learning_rate": 4.766194038898291e-06, |
| "loss": 0.6014, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.873758865248227, |
| "grad_norm": 2.7132294178009033, |
| "learning_rate": 4.76593055499171e-06, |
| "loss": 0.5638, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.8742316784869977, |
| "grad_norm": 2.7134575843811035, |
| "learning_rate": 4.765666929995568e-06, |
| "loss": 0.52, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.8747044917257684, |
| "grad_norm": 2.3804993629455566, |
| "learning_rate": 4.765403163926282e-06, |
| "loss": 0.5435, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.875177304964539, |
| "grad_norm": 2.8782761096954346, |
| "learning_rate": 4.765139256800274e-06, |
| "loss": 0.5843, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.8756501182033097, |
| "grad_norm": 2.836209774017334, |
| "learning_rate": 4.764875208633977e-06, |
| "loss": 0.6667, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.8761229314420804, |
| "grad_norm": 2.608851194381714, |
| "learning_rate": 4.764611019443831e-06, |
| "loss": 0.5436, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.8765957446808511, |
| "grad_norm": 2.788738965988159, |
| "learning_rate": 4.764346689246288e-06, |
| "loss": 0.7331, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.8770685579196218, |
| "grad_norm": 2.524277687072754, |
| "learning_rate": 4.764082218057805e-06, |
| "loss": 0.5067, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.8775413711583925, |
| "grad_norm": 3.7559316158294678, |
| "learning_rate": 4.763817605894851e-06, |
| "loss": 0.6809, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.8780141843971632, |
| "grad_norm": 2.9070613384246826, |
| "learning_rate": 4.763552852773899e-06, |
| "loss": 0.5913, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.8784869976359339, |
| "grad_norm": 2.7050609588623047, |
| "learning_rate": 4.7632879587114386e-06, |
| "loss": 0.6074, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.8789598108747045, |
| "grad_norm": 2.891134262084961, |
| "learning_rate": 4.76302292372396e-06, |
| "loss": 0.5939, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.8794326241134752, |
| "grad_norm": 2.8581702709198, |
| "learning_rate": 4.762757747827968e-06, |
| "loss": 0.5972, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8799054373522459, |
| "grad_norm": 2.8266196250915527, |
| "learning_rate": 4.762492431039971e-06, |
| "loss": 0.5993, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.8803782505910166, |
| "grad_norm": 2.4853954315185547, |
| "learning_rate": 4.762226973376493e-06, |
| "loss": 0.6388, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.8808510638297873, |
| "grad_norm": 3.2212886810302734, |
| "learning_rate": 4.761961374854059e-06, |
| "loss": 0.6698, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.881323877068558, |
| "grad_norm": 3.1254501342773438, |
| "learning_rate": 4.761695635489211e-06, |
| "loss": 0.5263, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.8817966903073287, |
| "grad_norm": 2.6891462802886963, |
| "learning_rate": 4.761429755298491e-06, |
| "loss": 0.5359, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.8822695035460993, |
| "grad_norm": 2.8557538986206055, |
| "learning_rate": 4.761163734298457e-06, |
| "loss": 0.5933, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.88274231678487, |
| "grad_norm": 2.53548264503479, |
| "learning_rate": 4.7608975725056724e-06, |
| "loss": 0.6397, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.8832151300236407, |
| "grad_norm": 3.0237956047058105, |
| "learning_rate": 4.76063126993671e-06, |
| "loss": 0.6845, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.8836879432624114, |
| "grad_norm": 3.222886800765991, |
| "learning_rate": 4.76036482660815e-06, |
| "loss": 0.6055, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.8841607565011821, |
| "grad_norm": 3.1867551803588867, |
| "learning_rate": 4.760098242536584e-06, |
| "loss": 0.6592, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.8846335697399527, |
| "grad_norm": 2.782209873199463, |
| "learning_rate": 4.7598315177386115e-06, |
| "loss": 0.5833, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.8851063829787233, |
| "grad_norm": 2.899871587753296, |
| "learning_rate": 4.759564652230838e-06, |
| "loss": 0.6129, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.885579196217494, |
| "grad_norm": 2.5690579414367676, |
| "learning_rate": 4.759297646029882e-06, |
| "loss": 0.5827, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.8860520094562647, |
| "grad_norm": 2.666130304336548, |
| "learning_rate": 4.759030499152368e-06, |
| "loss": 0.5272, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.8865248226950354, |
| "grad_norm": 2.7030911445617676, |
| "learning_rate": 4.758763211614932e-06, |
| "loss": 0.6415, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.8869976359338061, |
| "grad_norm": 2.717512845993042, |
| "learning_rate": 4.7584957834342135e-06, |
| "loss": 0.5827, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.8874704491725768, |
| "grad_norm": 2.665823459625244, |
| "learning_rate": 4.758228214626867e-06, |
| "loss": 0.6209, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.8879432624113475, |
| "grad_norm": 2.636653184890747, |
| "learning_rate": 4.75796050520955e-06, |
| "loss": 0.6413, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.8884160756501182, |
| "grad_norm": 2.585115671157837, |
| "learning_rate": 4.7576926551989345e-06, |
| "loss": 0.5518, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 2.808526039123535, |
| "learning_rate": 4.757424664611697e-06, |
| "loss": 0.5717, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.8893617021276595, |
| "grad_norm": 3.5957939624786377, |
| "learning_rate": 4.757156533464524e-06, |
| "loss": 0.6323, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.8898345153664302, |
| "grad_norm": 2.5003883838653564, |
| "learning_rate": 4.756888261774111e-06, |
| "loss": 0.5937, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.8903073286052009, |
| "grad_norm": 2.749061346054077, |
| "learning_rate": 4.756619849557161e-06, |
| "loss": 0.6642, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.8907801418439716, |
| "grad_norm": 2.6757891178131104, |
| "learning_rate": 4.756351296830389e-06, |
| "loss": 0.5887, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.8912529550827423, |
| "grad_norm": 2.811925172805786, |
| "learning_rate": 4.756082603610516e-06, |
| "loss": 0.6571, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.891725768321513, |
| "grad_norm": 2.5054616928100586, |
| "learning_rate": 4.755813769914271e-06, |
| "loss": 0.6312, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.8921985815602836, |
| "grad_norm": 2.7518467903137207, |
| "learning_rate": 4.755544795758395e-06, |
| "loss": 0.6685, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.8926713947990543, |
| "grad_norm": 2.7527287006378174, |
| "learning_rate": 4.755275681159634e-06, |
| "loss": 0.5886, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.893144208037825, |
| "grad_norm": 2.6162452697753906, |
| "learning_rate": 4.755006426134745e-06, |
| "loss": 0.546, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.8936170212765957, |
| "grad_norm": 2.4016737937927246, |
| "learning_rate": 4.754737030700495e-06, |
| "loss": 0.5726, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.8940898345153664, |
| "grad_norm": 2.528327703475952, |
| "learning_rate": 4.754467494873656e-06, |
| "loss": 0.5682, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.8945626477541371, |
| "grad_norm": 2.3139286041259766, |
| "learning_rate": 4.7541978186710115e-06, |
| "loss": 0.6108, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.8950354609929078, |
| "grad_norm": 2.7269136905670166, |
| "learning_rate": 4.753928002109354e-06, |
| "loss": 0.5875, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.8955082742316784, |
| "grad_norm": 4.425495147705078, |
| "learning_rate": 4.753658045205482e-06, |
| "loss": 0.5572, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.8959810874704491, |
| "grad_norm": 2.535409927368164, |
| "learning_rate": 4.753387947976206e-06, |
| "loss": 0.5868, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.8964539007092198, |
| "grad_norm": 2.722458600997925, |
| "learning_rate": 4.753117710438343e-06, |
| "loss": 0.5935, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.8969267139479905, |
| "grad_norm": 2.743861436843872, |
| "learning_rate": 4.75284733260872e-06, |
| "loss": 0.572, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.8973995271867612, |
| "grad_norm": 2.60640549659729, |
| "learning_rate": 4.752576814504173e-06, |
| "loss": 0.567, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.8978723404255319, |
| "grad_norm": 2.7486042976379395, |
| "learning_rate": 4.7523061561415435e-06, |
| "loss": 0.5768, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.8983451536643026, |
| "grad_norm": 3.8410251140594482, |
| "learning_rate": 4.752035357537686e-06, |
| "loss": 0.6034, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8988179669030733, |
| "grad_norm": 3.0935890674591064, |
| "learning_rate": 4.751764418709462e-06, |
| "loss": 0.5644, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.8992907801418439, |
| "grad_norm": 2.7989892959594727, |
| "learning_rate": 4.751493339673742e-06, |
| "loss": 0.656, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.8997635933806146, |
| "grad_norm": 3.6940557956695557, |
| "learning_rate": 4.751222120447403e-06, |
| "loss": 0.6632, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.9002364066193853, |
| "grad_norm": 2.3428797721862793, |
| "learning_rate": 4.750950761047335e-06, |
| "loss": 0.4485, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.900709219858156, |
| "grad_norm": 2.622544050216675, |
| "learning_rate": 4.750679261490432e-06, |
| "loss": 0.5857, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.9011820330969267, |
| "grad_norm": 2.4911322593688965, |
| "learning_rate": 4.750407621793601e-06, |
| "loss": 0.5618, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.9016548463356974, |
| "grad_norm": 2.6434662342071533, |
| "learning_rate": 4.750135841973755e-06, |
| "loss": 0.6057, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.902127659574468, |
| "grad_norm": 3.115443706512451, |
| "learning_rate": 4.749863922047817e-06, |
| "loss": 0.6064, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.9026004728132387, |
| "grad_norm": 2.5671091079711914, |
| "learning_rate": 4.749591862032718e-06, |
| "loss": 0.5625, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.9030732860520094, |
| "grad_norm": 3.2008655071258545, |
| "learning_rate": 4.749319661945398e-06, |
| "loss": 0.5547, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.9035460992907801, |
| "grad_norm": 2.905987024307251, |
| "learning_rate": 4.749047321802805e-06, |
| "loss": 0.6033, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.9040189125295508, |
| "grad_norm": 3.1456053256988525, |
| "learning_rate": 4.748774841621897e-06, |
| "loss": 0.5651, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.9044917257683215, |
| "grad_norm": 2.8116416931152344, |
| "learning_rate": 4.748502221419641e-06, |
| "loss": 0.5853, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.9049645390070922, |
| "grad_norm": 3.123835325241089, |
| "learning_rate": 4.748229461213011e-06, |
| "loss": 0.5427, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.9054373522458629, |
| "grad_norm": 2.4750146865844727, |
| "learning_rate": 4.747956561018989e-06, |
| "loss": 0.6517, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.9059101654846335, |
| "grad_norm": 2.6174299716949463, |
| "learning_rate": 4.7476835208545705e-06, |
| "loss": 0.6119, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.9063829787234042, |
| "grad_norm": 2.7390382289886475, |
| "learning_rate": 4.747410340736755e-06, |
| "loss": 0.5664, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.9068557919621749, |
| "grad_norm": 2.7940444946289062, |
| "learning_rate": 4.747137020682552e-06, |
| "loss": 0.5628, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.9073286052009456, |
| "grad_norm": 2.477365016937256, |
| "learning_rate": 4.7468635607089795e-06, |
| "loss": 0.5261, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.9078014184397163, |
| "grad_norm": 2.7016685009002686, |
| "learning_rate": 4.746589960833066e-06, |
| "loss": 0.5576, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.908274231678487, |
| "grad_norm": 2.8806519508361816, |
| "learning_rate": 4.746316221071846e-06, |
| "loss": 0.5925, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.9087470449172577, |
| "grad_norm": 3.0315234661102295, |
| "learning_rate": 4.746042341442365e-06, |
| "loss": 0.6142, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.9092198581560283, |
| "grad_norm": 4.2446160316467285, |
| "learning_rate": 4.745768321961676e-06, |
| "loss": 0.5352, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.909692671394799, |
| "grad_norm": 2.6517012119293213, |
| "learning_rate": 4.745494162646841e-06, |
| "loss": 0.6118, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.9101654846335697, |
| "grad_norm": 2.774900197982788, |
| "learning_rate": 4.7452198635149304e-06, |
| "loss": 0.572, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.9106382978723404, |
| "grad_norm": 3.0133683681488037, |
| "learning_rate": 4.744945424583024e-06, |
| "loss": 0.5897, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.9111111111111111, |
| "grad_norm": 2.7344839572906494, |
| "learning_rate": 4.744670845868211e-06, |
| "loss": 0.6207, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.9115839243498818, |
| "grad_norm": 2.636578321456909, |
| "learning_rate": 4.744396127387586e-06, |
| "loss": 0.6687, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.9120567375886525, |
| "grad_norm": 2.8663458824157715, |
| "learning_rate": 4.744121269158255e-06, |
| "loss": 0.5002, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.9125295508274232, |
| "grad_norm": 2.661079168319702, |
| "learning_rate": 4.743846271197333e-06, |
| "loss": 0.5848, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.9130023640661938, |
| "grad_norm": 2.881256341934204, |
| "learning_rate": 4.743571133521943e-06, |
| "loss": 0.5911, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.9134751773049645, |
| "grad_norm": 2.5540573596954346, |
| "learning_rate": 4.743295856149217e-06, |
| "loss": 0.5647, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.9139479905437352, |
| "grad_norm": 2.7060387134552, |
| "learning_rate": 4.743020439096293e-06, |
| "loss": 0.6267, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.9144208037825059, |
| "grad_norm": 2.694481372833252, |
| "learning_rate": 4.742744882380323e-06, |
| "loss": 0.6283, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.9148936170212766, |
| "grad_norm": 2.711555242538452, |
| "learning_rate": 4.7424691860184625e-06, |
| "loss": 0.5784, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.9153664302600473, |
| "grad_norm": 2.9077224731445312, |
| "learning_rate": 4.742193350027879e-06, |
| "loss": 0.5948, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.915839243498818, |
| "grad_norm": 2.9824187755584717, |
| "learning_rate": 4.7419173744257476e-06, |
| "loss": 0.6115, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.9163120567375886, |
| "grad_norm": 2.5127830505371094, |
| "learning_rate": 4.7416412592292515e-06, |
| "loss": 0.5803, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.9167848699763593, |
| "grad_norm": 3.1307175159454346, |
| "learning_rate": 4.741365004455583e-06, |
| "loss": 0.5657, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.91725768321513, |
| "grad_norm": 2.8205273151397705, |
| "learning_rate": 4.741088610121944e-06, |
| "loss": 0.6145, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.9177304964539007, |
| "grad_norm": 2.6119720935821533, |
| "learning_rate": 4.7408120762455444e-06, |
| "loss": 0.6058, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.9182033096926714, |
| "grad_norm": 2.421276092529297, |
| "learning_rate": 4.7405354028436025e-06, |
| "loss": 0.5973, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.9186761229314421, |
| "grad_norm": 2.9846808910369873, |
| "learning_rate": 4.740258589933346e-06, |
| "loss": 0.6892, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.9191489361702128, |
| "grad_norm": 2.6899871826171875, |
| "learning_rate": 4.739981637532009e-06, |
| "loss": 0.5705, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.9196217494089834, |
| "grad_norm": 2.8636131286621094, |
| "learning_rate": 4.739704545656839e-06, |
| "loss": 0.5775, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.9200945626477541, |
| "grad_norm": 2.7659449577331543, |
| "learning_rate": 4.739427314325087e-06, |
| "loss": 0.5823, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.9205673758865248, |
| "grad_norm": 4.71295166015625, |
| "learning_rate": 4.739149943554016e-06, |
| "loss": 0.5601, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.9210401891252955, |
| "grad_norm": 2.642636775970459, |
| "learning_rate": 4.738872433360896e-06, |
| "loss": 0.5278, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.9215130023640662, |
| "grad_norm": 2.4658217430114746, |
| "learning_rate": 4.7385947837630065e-06, |
| "loss": 0.6392, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.9219858156028369, |
| "grad_norm": 2.851602792739868, |
| "learning_rate": 4.738316994777636e-06, |
| "loss": 0.6164, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.9224586288416076, |
| "grad_norm": 2.394226551055908, |
| "learning_rate": 4.738039066422081e-06, |
| "loss": 0.5556, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.9229314420803783, |
| "grad_norm": 2.7985100746154785, |
| "learning_rate": 4.737760998713647e-06, |
| "loss": 0.5799, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.9234042553191489, |
| "grad_norm": 2.5974674224853516, |
| "learning_rate": 4.737482791669648e-06, |
| "loss": 0.6984, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.9238770685579196, |
| "grad_norm": 2.707636594772339, |
| "learning_rate": 4.737204445307406e-06, |
| "loss": 0.5548, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.9243498817966903, |
| "grad_norm": 2.7882707118988037, |
| "learning_rate": 4.736925959644254e-06, |
| "loss": 0.6026, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.924822695035461, |
| "grad_norm": 2.474482774734497, |
| "learning_rate": 4.7366473346975304e-06, |
| "loss": 0.5832, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.9252955082742317, |
| "grad_norm": 2.6196324825286865, |
| "learning_rate": 4.736368570484585e-06, |
| "loss": 0.5861, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.9257683215130024, |
| "grad_norm": 2.826864004135132, |
| "learning_rate": 4.736089667022775e-06, |
| "loss": 0.6173, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.926241134751773, |
| "grad_norm": 2.414473056793213, |
| "learning_rate": 4.735810624329466e-06, |
| "loss": 0.5753, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.9267139479905437, |
| "grad_norm": 2.8037970066070557, |
| "learning_rate": 4.7355314424220335e-06, |
| "loss": 0.6207, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.9271867612293144, |
| "grad_norm": 2.645458698272705, |
| "learning_rate": 4.735252121317861e-06, |
| "loss": 0.5959, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.9276595744680851, |
| "grad_norm": 2.7983884811401367, |
| "learning_rate": 4.734972661034339e-06, |
| "loss": 0.5696, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.9281323877068558, |
| "grad_norm": 3.0568997859954834, |
| "learning_rate": 4.73469306158887e-06, |
| "loss": 0.6194, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.9286052009456265, |
| "grad_norm": 2.7205135822296143, |
| "learning_rate": 4.734413322998863e-06, |
| "loss": 0.5292, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.9290780141843972, |
| "grad_norm": 3.3168489933013916, |
| "learning_rate": 4.734133445281735e-06, |
| "loss": 0.5654, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.9295508274231679, |
| "grad_norm": 3.0095653533935547, |
| "learning_rate": 4.733853428454916e-06, |
| "loss": 0.6508, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.9300236406619385, |
| "grad_norm": 2.7726712226867676, |
| "learning_rate": 4.733573272535838e-06, |
| "loss": 0.644, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.9304964539007092, |
| "grad_norm": 2.474397659301758, |
| "learning_rate": 4.7332929775419456e-06, |
| "loss": 0.5479, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.9309692671394799, |
| "grad_norm": 2.4518635272979736, |
| "learning_rate": 4.733012543490693e-06, |
| "loss": 0.6, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.9314420803782506, |
| "grad_norm": 2.9292192459106445, |
| "learning_rate": 4.73273197039954e-06, |
| "loss": 0.6647, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.9319148936170213, |
| "grad_norm": 2.425004720687866, |
| "learning_rate": 4.732451258285958e-06, |
| "loss": 0.6338, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.932387706855792, |
| "grad_norm": 2.904479503631592, |
| "learning_rate": 4.7321704071674255e-06, |
| "loss": 0.5923, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.9328605200945627, |
| "grad_norm": 2.477085590362549, |
| "learning_rate": 4.731889417061428e-06, |
| "loss": 0.5984, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 2.585240364074707, |
| "learning_rate": 4.731608287985465e-06, |
| "loss": 0.558, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.933806146572104, |
| "grad_norm": 2.658714532852173, |
| "learning_rate": 4.731327019957039e-06, |
| "loss": 0.5567, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.9342789598108747, |
| "grad_norm": 2.7593026161193848, |
| "learning_rate": 4.731045612993662e-06, |
| "loss": 0.5772, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.9347517730496454, |
| "grad_norm": 2.4386026859283447, |
| "learning_rate": 4.7307640671128585e-06, |
| "loss": 0.6199, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.9352245862884161, |
| "grad_norm": 2.681910514831543, |
| "learning_rate": 4.730482382332158e-06, |
| "loss": 0.5971, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.9356973995271868, |
| "grad_norm": 3.7593860626220703, |
| "learning_rate": 4.7302005586691e-06, |
| "loss": 0.6346, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.9361702127659575, |
| "grad_norm": 2.5789096355438232, |
| "learning_rate": 4.729918596141232e-06, |
| "loss": 0.5684, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.9366430260047282, |
| "grad_norm": 3.0607335567474365, |
| "learning_rate": 4.729636494766111e-06, |
| "loss": 0.6223, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.9371158392434988, |
| "grad_norm": 2.906643867492676, |
| "learning_rate": 4.729354254561303e-06, |
| "loss": 0.6513, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.9375886524822695, |
| "grad_norm": 3.192430019378662, |
| "learning_rate": 4.7290718755443795e-06, |
| "loss": 0.5095, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.9380614657210402, |
| "grad_norm": 2.661536931991577, |
| "learning_rate": 4.7287893577329255e-06, |
| "loss": 0.5525, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.9385342789598109, |
| "grad_norm": 2.8436734676361084, |
| "learning_rate": 4.728506701144531e-06, |
| "loss": 0.6323, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.9390070921985816, |
| "grad_norm": 2.75544810295105, |
| "learning_rate": 4.728223905796796e-06, |
| "loss": 0.6018, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.9394799054373523, |
| "grad_norm": 3.0652759075164795, |
| "learning_rate": 4.727940971707329e-06, |
| "loss": 0.62, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.939952718676123, |
| "grad_norm": 2.802567720413208, |
| "learning_rate": 4.727657898893747e-06, |
| "loss": 0.5809, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.9404255319148936, |
| "grad_norm": 2.6208512783050537, |
| "learning_rate": 4.7273746873736745e-06, |
| "loss": 0.5762, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.9408983451536643, |
| "grad_norm": 2.5901873111724854, |
| "learning_rate": 4.727091337164748e-06, |
| "loss": 0.6111, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.941371158392435, |
| "grad_norm": 3.002347707748413, |
| "learning_rate": 4.726807848284609e-06, |
| "loss": 0.6419, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.9418439716312057, |
| "grad_norm": 2.522151470184326, |
| "learning_rate": 4.72652422075091e-06, |
| "loss": 0.642, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.9423167848699764, |
| "grad_norm": 2.5571532249450684, |
| "learning_rate": 4.726240454581311e-06, |
| "loss": 0.5729, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.9427895981087471, |
| "grad_norm": 2.7704918384552, |
| "learning_rate": 4.72595654979348e-06, |
| "loss": 0.6816, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.9432624113475178, |
| "grad_norm": 2.517040491104126, |
| "learning_rate": 4.7256725064050955e-06, |
| "loss": 0.5782, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.9437352245862884, |
| "grad_norm": 2.613955020904541, |
| "learning_rate": 4.725388324433843e-06, |
| "loss": 0.6291, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.9442080378250591, |
| "grad_norm": 2.848891258239746, |
| "learning_rate": 4.725104003897418e-06, |
| "loss": 0.6544, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.9446808510638298, |
| "grad_norm": 3.0162429809570312, |
| "learning_rate": 4.724819544813523e-06, |
| "loss": 0.6301, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.9451536643026005, |
| "grad_norm": 2.613614559173584, |
| "learning_rate": 4.72453494719987e-06, |
| "loss": 0.5829, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.9456264775413712, |
| "grad_norm": 2.4838767051696777, |
| "learning_rate": 4.724250211074182e-06, |
| "loss": 0.6042, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9460992907801419, |
| "grad_norm": 2.526470899581909, |
| "learning_rate": 4.723965336454185e-06, |
| "loss": 0.6167, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.9465721040189126, |
| "grad_norm": 2.504506826400757, |
| "learning_rate": 4.723680323357618e-06, |
| "loss": 0.6061, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.9470449172576832, |
| "grad_norm": 3.0547544956207275, |
| "learning_rate": 4.723395171802228e-06, |
| "loss": 0.6619, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.9475177304964539, |
| "grad_norm": 2.8692407608032227, |
| "learning_rate": 4.723109881805771e-06, |
| "loss": 0.5985, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.9479905437352246, |
| "grad_norm": 2.7929654121398926, |
| "learning_rate": 4.7228244533860094e-06, |
| "loss": 0.5869, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.9484633569739953, |
| "grad_norm": 2.764869451522827, |
| "learning_rate": 4.7225388865607146e-06, |
| "loss": 0.6288, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.948936170212766, |
| "grad_norm": 2.7656404972076416, |
| "learning_rate": 4.722253181347671e-06, |
| "loss": 0.5831, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.9494089834515367, |
| "grad_norm": 2.6698336601257324, |
| "learning_rate": 4.7219673377646635e-06, |
| "loss": 0.6087, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.9498817966903074, |
| "grad_norm": 2.524935722351074, |
| "learning_rate": 4.7216813558294946e-06, |
| "loss": 0.5675, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.950354609929078, |
| "grad_norm": 2.5998785495758057, |
| "learning_rate": 4.721395235559969e-06, |
| "loss": 0.5667, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.9508274231678487, |
| "grad_norm": 2.758021354675293, |
| "learning_rate": 4.721108976973902e-06, |
| "loss": 0.4931, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.9513002364066194, |
| "grad_norm": 2.767695903778076, |
| "learning_rate": 4.72082258008912e-06, |
| "loss": 0.5778, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.9517730496453901, |
| "grad_norm": 2.982314348220825, |
| "learning_rate": 4.720536044923453e-06, |
| "loss": 0.6096, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.9522458628841608, |
| "grad_norm": 2.7608799934387207, |
| "learning_rate": 4.720249371494743e-06, |
| "loss": 0.6242, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.9527186761229315, |
| "grad_norm": 2.60054349899292, |
| "learning_rate": 4.71996255982084e-06, |
| "loss": 0.6249, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.9531914893617022, |
| "grad_norm": 2.654355764389038, |
| "learning_rate": 4.719675609919603e-06, |
| "loss": 0.6327, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.9536643026004729, |
| "grad_norm": 2.589404582977295, |
| "learning_rate": 4.719388521808899e-06, |
| "loss": 0.6357, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.9541371158392435, |
| "grad_norm": 2.8016581535339355, |
| "learning_rate": 4.719101295506603e-06, |
| "loss": 0.5901, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.9546099290780142, |
| "grad_norm": 3.1408045291900635, |
| "learning_rate": 4.7188139310306e-06, |
| "loss": 0.598, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.9550827423167849, |
| "grad_norm": 2.7432665824890137, |
| "learning_rate": 4.718526428398783e-06, |
| "loss": 0.5508, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.9555555555555556, |
| "grad_norm": 2.947800874710083, |
| "learning_rate": 4.718238787629053e-06, |
| "loss": 0.6439, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.9560283687943263, |
| "grad_norm": 2.50828218460083, |
| "learning_rate": 4.71795100873932e-06, |
| "loss": 0.5441, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.956501182033097, |
| "grad_norm": 2.8558974266052246, |
| "learning_rate": 4.717663091747503e-06, |
| "loss": 0.5416, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.9569739952718677, |
| "grad_norm": 2.4803316593170166, |
| "learning_rate": 4.71737503667153e-06, |
| "loss": 0.5317, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.9574468085106383, |
| "grad_norm": 4.36754035949707, |
| "learning_rate": 4.717086843529336e-06, |
| "loss": 0.5808, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.957919621749409, |
| "grad_norm": 2.730185031890869, |
| "learning_rate": 4.7167985123388665e-06, |
| "loss": 0.5257, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.9583924349881797, |
| "grad_norm": 2.8136069774627686, |
| "learning_rate": 4.716510043118074e-06, |
| "loss": 0.5836, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.9588652482269504, |
| "grad_norm": 2.793975353240967, |
| "learning_rate": 4.71622143588492e-06, |
| "loss": 0.5706, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.9593380614657211, |
| "grad_norm": 2.3883821964263916, |
| "learning_rate": 4.7159326906573745e-06, |
| "loss": 0.5291, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.9598108747044918, |
| "grad_norm": 2.6135976314544678, |
| "learning_rate": 4.715643807453417e-06, |
| "loss": 0.6199, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.9602836879432625, |
| "grad_norm": 2.6245670318603516, |
| "learning_rate": 4.715354786291035e-06, |
| "loss": 0.5585, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.9607565011820332, |
| "grad_norm": 2.7870967388153076, |
| "learning_rate": 4.715065627188225e-06, |
| "loss": 0.6196, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.9612293144208038, |
| "grad_norm": 2.6983911991119385, |
| "learning_rate": 4.714776330162991e-06, |
| "loss": 0.6424, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.9617021276595744, |
| "grad_norm": 2.3221919536590576, |
| "learning_rate": 4.7144868952333465e-06, |
| "loss": 0.568, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.9621749408983451, |
| "grad_norm": 2.9408178329467773, |
| "learning_rate": 4.714197322417314e-06, |
| "loss": 0.6175, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.9626477541371158, |
| "grad_norm": 2.404057264328003, |
| "learning_rate": 4.713907611732921e-06, |
| "loss": 0.4943, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.9631205673758865, |
| "grad_norm": 3.547607660293579, |
| "learning_rate": 4.71361776319821e-06, |
| "loss": 0.5488, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.9635933806146572, |
| "grad_norm": 2.679614543914795, |
| "learning_rate": 4.713327776831227e-06, |
| "loss": 0.6234, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.9640661938534278, |
| "grad_norm": 2.526914119720459, |
| "learning_rate": 4.7130376526500286e-06, |
| "loss": 0.5891, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.9645390070921985, |
| "grad_norm": 2.6953470706939697, |
| "learning_rate": 4.71274739067268e-06, |
| "loss": 0.69, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.9650118203309692, |
| "grad_norm": 2.546660900115967, |
| "learning_rate": 4.712456990917254e-06, |
| "loss": 0.6185, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.9654846335697399, |
| "grad_norm": 3.3920490741729736, |
| "learning_rate": 4.712166453401832e-06, |
| "loss": 0.587, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.9659574468085106, |
| "grad_norm": 2.5961573123931885, |
| "learning_rate": 4.711875778144504e-06, |
| "loss": 0.6105, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.9664302600472813, |
| "grad_norm": 2.5111498832702637, |
| "learning_rate": 4.711584965163372e-06, |
| "loss": 0.5533, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.966903073286052, |
| "grad_norm": 2.4878132343292236, |
| "learning_rate": 4.7112940144765405e-06, |
| "loss": 0.5604, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.9673758865248226, |
| "grad_norm": 2.5714077949523926, |
| "learning_rate": 4.711002926102128e-06, |
| "loss": 0.5794, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.9678486997635933, |
| "grad_norm": 2.7069091796875, |
| "learning_rate": 4.710711700058257e-06, |
| "loss": 0.594, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.968321513002364, |
| "grad_norm": 2.8104631900787354, |
| "learning_rate": 4.710420336363063e-06, |
| "loss": 0.6247, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.9687943262411347, |
| "grad_norm": 2.8464386463165283, |
| "learning_rate": 4.7101288350346865e-06, |
| "loss": 0.6162, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.9692671394799054, |
| "grad_norm": 2.7187976837158203, |
| "learning_rate": 4.709837196091279e-06, |
| "loss": 0.6109, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.9697399527186761, |
| "grad_norm": 2.556734085083008, |
| "learning_rate": 4.709545419550999e-06, |
| "loss": 0.6297, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.9702127659574468, |
| "grad_norm": 2.937195062637329, |
| "learning_rate": 4.709253505432014e-06, |
| "loss": 0.6862, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.9706855791962175, |
| "grad_norm": 2.792175531387329, |
| "learning_rate": 4.7089614537525015e-06, |
| "loss": 0.6105, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.9711583924349881, |
| "grad_norm": 2.625636100769043, |
| "learning_rate": 4.708669264530644e-06, |
| "loss": 0.5849, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.9716312056737588, |
| "grad_norm": 2.6752610206604004, |
| "learning_rate": 4.708376937784637e-06, |
| "loss": 0.5949, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.9721040189125295, |
| "grad_norm": 2.6072793006896973, |
| "learning_rate": 4.708084473532681e-06, |
| "loss": 0.5776, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.9725768321513002, |
| "grad_norm": 2.728632926940918, |
| "learning_rate": 4.707791871792988e-06, |
| "loss": 0.6352, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.9730496453900709, |
| "grad_norm": 2.5841758251190186, |
| "learning_rate": 4.707499132583775e-06, |
| "loss": 0.5488, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.9735224586288416, |
| "grad_norm": 2.8464293479919434, |
| "learning_rate": 4.707206255923271e-06, |
| "loss": 0.7051, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.9739952718676123, |
| "grad_norm": 2.547297239303589, |
| "learning_rate": 4.706913241829712e-06, |
| "loss": 0.5937, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.9744680851063829, |
| "grad_norm": 2.6572306156158447, |
| "learning_rate": 4.706620090321341e-06, |
| "loss": 0.6041, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.9749408983451536, |
| "grad_norm": 2.3262805938720703, |
| "learning_rate": 4.706326801416414e-06, |
| "loss": 0.5144, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.9754137115839243, |
| "grad_norm": 2.9693965911865234, |
| "learning_rate": 4.706033375133191e-06, |
| "loss": 0.551, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.975886524822695, |
| "grad_norm": 2.5993731021881104, |
| "learning_rate": 4.7057398114899435e-06, |
| "loss": 0.6143, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.9763593380614657, |
| "grad_norm": 2.453336477279663, |
| "learning_rate": 4.70544611050495e-06, |
| "loss": 0.6093, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.9768321513002364, |
| "grad_norm": 2.898629665374756, |
| "learning_rate": 4.705152272196497e-06, |
| "loss": 0.6007, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.9773049645390071, |
| "grad_norm": 2.7990612983703613, |
| "learning_rate": 4.7048582965828815e-06, |
| "loss": 0.6687, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.9777777777777777, |
| "grad_norm": 2.635284423828125, |
| "learning_rate": 4.704564183682408e-06, |
| "loss": 0.5564, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.9782505910165484, |
| "grad_norm": 3.014547109603882, |
| "learning_rate": 4.704269933513389e-06, |
| "loss": 0.6084, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.9787234042553191, |
| "grad_norm": 2.659357786178589, |
| "learning_rate": 4.703975546094147e-06, |
| "loss": 0.6031, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.9791962174940898, |
| "grad_norm": 2.326932668685913, |
| "learning_rate": 4.703681021443013e-06, |
| "loss": 0.5859, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.9796690307328605, |
| "grad_norm": 2.958803653717041, |
| "learning_rate": 4.7033863595783235e-06, |
| "loss": 0.5586, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.9801418439716312, |
| "grad_norm": 2.921386957168579, |
| "learning_rate": 4.703091560518427e-06, |
| "loss": 0.6126, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.9806146572104019, |
| "grad_norm": 2.6500775814056396, |
| "learning_rate": 4.702796624281679e-06, |
| "loss": 0.5678, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.9810874704491725, |
| "grad_norm": 2.7740228176116943, |
| "learning_rate": 4.702501550886445e-06, |
| "loss": 0.6067, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.9815602836879432, |
| "grad_norm": 2.3296213150024414, |
| "learning_rate": 4.702206340351096e-06, |
| "loss": 0.5247, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.9820330969267139, |
| "grad_norm": 2.748300790786743, |
| "learning_rate": 4.701910992694016e-06, |
| "loss": 0.5197, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.9825059101654846, |
| "grad_norm": 2.250985622406006, |
| "learning_rate": 4.7016155079335926e-06, |
| "loss": 0.5214, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.9829787234042553, |
| "grad_norm": 2.389845848083496, |
| "learning_rate": 4.701319886088226e-06, |
| "loss": 0.519, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.983451536643026, |
| "grad_norm": 2.818220853805542, |
| "learning_rate": 4.701024127176322e-06, |
| "loss": 0.607, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.9839243498817967, |
| "grad_norm": 3.4058034420013428, |
| "learning_rate": 4.700728231216297e-06, |
| "loss": 0.5711, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.9843971631205674, |
| "grad_norm": 2.5297787189483643, |
| "learning_rate": 4.700432198226575e-06, |
| "loss": 0.5979, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.984869976359338, |
| "grad_norm": 3.0548105239868164, |
| "learning_rate": 4.7001360282255885e-06, |
| "loss": 0.6041, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.9853427895981087, |
| "grad_norm": 2.8983733654022217, |
| "learning_rate": 4.699839721231779e-06, |
| "loss": 0.5926, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.9858156028368794, |
| "grad_norm": 3.2717764377593994, |
| "learning_rate": 4.699543277263596e-06, |
| "loss": 0.6477, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.9862884160756501, |
| "grad_norm": 3.03729248046875, |
| "learning_rate": 4.699246696339497e-06, |
| "loss": 0.6786, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.9867612293144208, |
| "grad_norm": 2.852301597595215, |
| "learning_rate": 4.698949978477951e-06, |
| "loss": 0.6565, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.9872340425531915, |
| "grad_norm": 2.843485116958618, |
| "learning_rate": 4.698653123697431e-06, |
| "loss": 0.6627, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.9877068557919622, |
| "grad_norm": 2.6315064430236816, |
| "learning_rate": 4.698356132016423e-06, |
| "loss": 0.6577, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.9881796690307328, |
| "grad_norm": 2.7482151985168457, |
| "learning_rate": 4.698059003453417e-06, |
| "loss": 0.5514, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.9886524822695035, |
| "grad_norm": 2.826673746109009, |
| "learning_rate": 4.6977617380269145e-06, |
| "loss": 0.565, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.9891252955082742, |
| "grad_norm": 3.0273752212524414, |
| "learning_rate": 4.697464335755427e-06, |
| "loss": 0.6331, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.9895981087470449, |
| "grad_norm": 2.7551653385162354, |
| "learning_rate": 4.6971667966574695e-06, |
| "loss": 0.6486, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.9900709219858156, |
| "grad_norm": 2.656299114227295, |
| "learning_rate": 4.696869120751571e-06, |
| "loss": 0.6562, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.9905437352245863, |
| "grad_norm": 2.785322904586792, |
| "learning_rate": 4.696571308056265e-06, |
| "loss": 0.5892, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.991016548463357, |
| "grad_norm": 2.9334635734558105, |
| "learning_rate": 4.696273358590095e-06, |
| "loss": 0.6346, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.9914893617021276, |
| "grad_norm": 2.7944300174713135, |
| "learning_rate": 4.695975272371613e-06, |
| "loss": 0.5859, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.9919621749408983, |
| "grad_norm": 2.5416972637176514, |
| "learning_rate": 4.695677049419381e-06, |
| "loss": 0.5658, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.992434988179669, |
| "grad_norm": 2.4056856632232666, |
| "learning_rate": 4.695378689751966e-06, |
| "loss": 0.5121, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.9929078014184397, |
| "grad_norm": 2.614548683166504, |
| "learning_rate": 4.695080193387948e-06, |
| "loss": 0.5961, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9933806146572104, |
| "grad_norm": 2.8966517448425293, |
| "learning_rate": 4.69478156034591e-06, |
| "loss": 0.5985, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.9938534278959811, |
| "grad_norm": 2.9514098167419434, |
| "learning_rate": 4.694482790644448e-06, |
| "loss": 0.5677, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.9943262411347518, |
| "grad_norm": 2.4326791763305664, |
| "learning_rate": 4.694183884302165e-06, |
| "loss": 0.5698, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.9947990543735225, |
| "grad_norm": 2.9242892265319824, |
| "learning_rate": 4.6938848413376735e-06, |
| "loss": 0.6245, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.9952718676122931, |
| "grad_norm": 2.9134104251861572, |
| "learning_rate": 4.693585661769593e-06, |
| "loss": 0.6164, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.9957446808510638, |
| "grad_norm": 2.472564458847046, |
| "learning_rate": 4.693286345616551e-06, |
| "loss": 0.5616, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.9962174940898345, |
| "grad_norm": 3.2456448078155518, |
| "learning_rate": 4.692986892897186e-06, |
| "loss": 0.6977, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.9966903073286052, |
| "grad_norm": 3.4032769203186035, |
| "learning_rate": 4.692687303630143e-06, |
| "loss": 0.643, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.9971631205673759, |
| "grad_norm": 2.722200870513916, |
| "learning_rate": 4.692387577834076e-06, |
| "loss": 0.5873, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.9976359338061466, |
| "grad_norm": 2.687532663345337, |
| "learning_rate": 4.692087715527648e-06, |
| "loss": 0.5423, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.9981087470449173, |
| "grad_norm": 2.578613042831421, |
| "learning_rate": 4.6917877167295305e-06, |
| "loss": 0.5689, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.9985815602836879, |
| "grad_norm": 3.1806094646453857, |
| "learning_rate": 4.691487581458402e-06, |
| "loss": 0.6133, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.9990543735224586, |
| "grad_norm": 2.4449520111083984, |
| "learning_rate": 4.691187309732952e-06, |
| "loss": 0.5841, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.9995271867612293, |
| "grad_norm": 2.908749580383301, |
| "learning_rate": 4.690886901571875e-06, |
| "loss": 0.534, |
| "step": 2114 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 4.019968032836914, |
| "learning_rate": 4.6905863569938785e-06, |
| "loss": 0.596, |
| "step": 2115 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 12690, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 2115, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.341936104473887e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|