| { |
| "best_global_step": 300, |
| "best_metric": 0.2121591567993164, |
| "best_model_checkpoint": "saves_multiple/prefix-tuning/llama-3-8b-instruct/train_cb_789_1760637866/checkpoint-300", |
| "epoch": 20.0, |
| "eval_steps": 100, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1, |
| "grad_norm": 126.67435455322266, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 10.0656, |
| "num_input_tokens_seen": 3360, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 103.99217987060547, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 9.3305, |
| "num_input_tokens_seen": 6240, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 119.90917205810547, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 8.3419, |
| "num_input_tokens_seen": 9472, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 145.24908447265625, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 7.5693, |
| "num_input_tokens_seen": 12448, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 111.7554702758789, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 6.6857, |
| "num_input_tokens_seen": 15584, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 100.42864227294922, |
| "learning_rate": 2.9e-06, |
| "loss": 4.8829, |
| "num_input_tokens_seen": 18048, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 76.21986389160156, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 3.8694, |
| "num_input_tokens_seen": 21248, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 63.294944763183594, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 4.3727, |
| "num_input_tokens_seen": 24992, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 43.78547668457031, |
| "learning_rate": 4.4e-06, |
| "loss": 2.2321, |
| "num_input_tokens_seen": 28000, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 47.35298156738281, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 1.6102, |
| "num_input_tokens_seen": 30976, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 45.79646301269531, |
| "learning_rate": 5.400000000000001e-06, |
| "loss": 0.9793, |
| "num_input_tokens_seen": 34432, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 49.426795959472656, |
| "learning_rate": 5.9e-06, |
| "loss": 0.5423, |
| "num_input_tokens_seen": 37024, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 30.20770835876465, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 0.4926, |
| "num_input_tokens_seen": 39840, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 13.208630561828613, |
| "learning_rate": 6.9e-06, |
| "loss": 0.2723, |
| "num_input_tokens_seen": 42592, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 35.72091293334961, |
| "learning_rate": 7.4e-06, |
| "loss": 0.2768, |
| "num_input_tokens_seen": 45664, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 37.15167999267578, |
| "learning_rate": 7.9e-06, |
| "loss": 0.2719, |
| "num_input_tokens_seen": 49568, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 19.817626953125, |
| "learning_rate": 8.400000000000001e-06, |
| "loss": 0.3764, |
| "num_input_tokens_seen": 52416, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 55.59193420410156, |
| "learning_rate": 8.900000000000001e-06, |
| "loss": 0.3647, |
| "num_input_tokens_seen": 55616, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 26.232587814331055, |
| "learning_rate": 9.4e-06, |
| "loss": 0.1932, |
| "num_input_tokens_seen": 59232, |
| "step": 95 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 33.89227294921875, |
| "learning_rate": 9.9e-06, |
| "loss": 0.4162, |
| "num_input_tokens_seen": 62752, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.23473547399044037, |
| "eval_runtime": 1.0978, |
| "eval_samples_per_second": 45.547, |
| "eval_steps_per_second": 11.842, |
| "num_input_tokens_seen": 62752, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 33.454803466796875, |
| "learning_rate": 9.999512620046523e-06, |
| "loss": 0.2281, |
| "num_input_tokens_seen": 65696, |
| "step": 105 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 44.9728889465332, |
| "learning_rate": 9.997532801828659e-06, |
| "loss": 0.357, |
| "num_input_tokens_seen": 69312, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 27.419870376586914, |
| "learning_rate": 9.994030686707171e-06, |
| "loss": 0.3221, |
| "num_input_tokens_seen": 72288, |
| "step": 115 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 24.098169326782227, |
| "learning_rate": 9.989007341460251e-06, |
| "loss": 0.2875, |
| "num_input_tokens_seen": 75232, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 52.44646072387695, |
| "learning_rate": 9.982464296247523e-06, |
| "loss": 0.3225, |
| "num_input_tokens_seen": 78912, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 18.374570846557617, |
| "learning_rate": 9.974403544143942e-06, |
| "loss": 0.274, |
| "num_input_tokens_seen": 81760, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 10.920315742492676, |
| "learning_rate": 9.964827540532685e-06, |
| "loss": 0.1883, |
| "num_input_tokens_seen": 85152, |
| "step": 135 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 18.052717208862305, |
| "learning_rate": 9.953739202357219e-06, |
| "loss": 0.1228, |
| "num_input_tokens_seen": 87776, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 43.478153228759766, |
| "learning_rate": 9.941141907232766e-06, |
| "loss": 0.1745, |
| "num_input_tokens_seen": 91392, |
| "step": 145 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 45.755645751953125, |
| "learning_rate": 9.927039492417452e-06, |
| "loss": 0.273, |
| "num_input_tokens_seen": 94656, |
| "step": 150 |
| }, |
| { |
| "epoch": 3.1, |
| "grad_norm": 79.13074493408203, |
| "learning_rate": 9.911436253643445e-06, |
| "loss": 0.3629, |
| "num_input_tokens_seen": 97856, |
| "step": 155 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 5.613999366760254, |
| "learning_rate": 9.894336943808426e-06, |
| "loss": 0.2236, |
| "num_input_tokens_seen": 101088, |
| "step": 160 |
| }, |
| { |
| "epoch": 3.3, |
| "grad_norm": 27.479028701782227, |
| "learning_rate": 9.875746771527817e-06, |
| "loss": 0.2376, |
| "num_input_tokens_seen": 103808, |
| "step": 165 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 40.90334701538086, |
| "learning_rate": 9.85567139954818e-06, |
| "loss": 0.1237, |
| "num_input_tokens_seen": 106784, |
| "step": 170 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 16.67771339416504, |
| "learning_rate": 9.834116943022299e-06, |
| "loss": 0.2255, |
| "num_input_tokens_seen": 109376, |
| "step": 175 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 28.74492645263672, |
| "learning_rate": 9.811089967646427e-06, |
| "loss": 0.3661, |
| "num_input_tokens_seen": 112736, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.7, |
| "grad_norm": 30.045061111450195, |
| "learning_rate": 9.786597487660336e-06, |
| "loss": 0.2437, |
| "num_input_tokens_seen": 115936, |
| "step": 185 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 26.008085250854492, |
| "learning_rate": 9.760646963710694e-06, |
| "loss": 0.3374, |
| "num_input_tokens_seen": 119520, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.9, |
| "grad_norm": 31.215551376342773, |
| "learning_rate": 9.733246300578482e-06, |
| "loss": 0.2789, |
| "num_input_tokens_seen": 123264, |
| "step": 195 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 24.799978256225586, |
| "learning_rate": 9.704403844771128e-06, |
| "loss": 0.2069, |
| "num_input_tokens_seen": 126016, |
| "step": 200 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.3462370038032532, |
| "eval_runtime": 1.0864, |
| "eval_samples_per_second": 46.022, |
| "eval_steps_per_second": 11.966, |
| "num_input_tokens_seen": 126016, |
| "step": 200 |
| }, |
| { |
| "epoch": 4.1, |
| "grad_norm": 13.58299732208252, |
| "learning_rate": 9.674128381980073e-06, |
| "loss": 0.1503, |
| "num_input_tokens_seen": 129056, |
| "step": 205 |
| }, |
| { |
| "epoch": 4.2, |
| "grad_norm": 19.04677963256836, |
| "learning_rate": 9.642429134404568e-06, |
| "loss": 0.0995, |
| "num_input_tokens_seen": 132224, |
| "step": 210 |
| }, |
| { |
| "epoch": 4.3, |
| "grad_norm": 15.613679885864258, |
| "learning_rate": 9.609315757942504e-06, |
| "loss": 0.3273, |
| "num_input_tokens_seen": 135200, |
| "step": 215 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 7.5095601081848145, |
| "learning_rate": 9.574798339249124e-06, |
| "loss": 0.2516, |
| "num_input_tokens_seen": 138624, |
| "step": 220 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 18.163787841796875, |
| "learning_rate": 9.538887392664544e-06, |
| "loss": 0.1767, |
| "num_input_tokens_seen": 141344, |
| "step": 225 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 22.580303192138672, |
| "learning_rate": 9.501593857010968e-06, |
| "loss": 0.2355, |
| "num_input_tokens_seen": 144416, |
| "step": 230 |
| }, |
| { |
| "epoch": 4.7, |
| "grad_norm": 30.586767196655273, |
| "learning_rate": 9.46292909226063e-06, |
| "loss": 0.2201, |
| "num_input_tokens_seen": 147008, |
| "step": 235 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 6.959929943084717, |
| "learning_rate": 9.42290487607542e-06, |
| "loss": 0.0765, |
| "num_input_tokens_seen": 150144, |
| "step": 240 |
| }, |
| { |
| "epoch": 4.9, |
| "grad_norm": 3.451066255569458, |
| "learning_rate": 9.381533400219319e-06, |
| "loss": 0.3708, |
| "num_input_tokens_seen": 153504, |
| "step": 245 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 32.64288330078125, |
| "learning_rate": 9.338827266844643e-06, |
| "loss": 0.2332, |
| "num_input_tokens_seen": 156800, |
| "step": 250 |
| }, |
| { |
| "epoch": 5.1, |
| "grad_norm": 27.317556381225586, |
| "learning_rate": 9.294799484653323e-06, |
| "loss": 0.1417, |
| "num_input_tokens_seen": 159712, |
| "step": 255 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 3.4010379314422607, |
| "learning_rate": 9.24946346493432e-06, |
| "loss": 0.2104, |
| "num_input_tokens_seen": 162976, |
| "step": 260 |
| }, |
| { |
| "epoch": 5.3, |
| "grad_norm": 24.579574584960938, |
| "learning_rate": 9.202833017478421e-06, |
| "loss": 0.3122, |
| "num_input_tokens_seen": 166400, |
| "step": 265 |
| }, |
| { |
| "epoch": 5.4, |
| "grad_norm": 26.051284790039062, |
| "learning_rate": 9.154922346371641e-06, |
| "loss": 0.1487, |
| "num_input_tokens_seen": 169696, |
| "step": 270 |
| }, |
| { |
| "epoch": 5.5, |
| "grad_norm": 7.223366737365723, |
| "learning_rate": 9.10574604566852e-06, |
| "loss": 0.1785, |
| "num_input_tokens_seen": 172800, |
| "step": 275 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 1.98892343044281, |
| "learning_rate": 9.055319094946633e-06, |
| "loss": 0.1335, |
| "num_input_tokens_seen": 175776, |
| "step": 280 |
| }, |
| { |
| "epoch": 5.7, |
| "grad_norm": 24.002784729003906, |
| "learning_rate": 9.003656854743667e-06, |
| "loss": 0.2928, |
| "num_input_tokens_seen": 178816, |
| "step": 285 |
| }, |
| { |
| "epoch": 5.8, |
| "grad_norm": 14.468860626220703, |
| "learning_rate": 8.950775061878453e-06, |
| "loss": 0.114, |
| "num_input_tokens_seen": 181952, |
| "step": 290 |
| }, |
| { |
| "epoch": 5.9, |
| "grad_norm": 6.480319976806641, |
| "learning_rate": 8.896689824657371e-06, |
| "loss": 0.078, |
| "num_input_tokens_seen": 184960, |
| "step": 295 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 1.0165597200393677, |
| "learning_rate": 8.841417617967618e-06, |
| "loss": 0.1345, |
| "num_input_tokens_seen": 188192, |
| "step": 300 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 0.2121591567993164, |
| "eval_runtime": 1.087, |
| "eval_samples_per_second": 45.999, |
| "eval_steps_per_second": 11.96, |
| "num_input_tokens_seen": 188192, |
| "step": 300 |
| }, |
| { |
| "epoch": 6.1, |
| "grad_norm": 7.664653301239014, |
| "learning_rate": 8.784975278258783e-06, |
| "loss": 0.0767, |
| "num_input_tokens_seen": 191520, |
| "step": 305 |
| }, |
| { |
| "epoch": 6.2, |
| "grad_norm": 69.77971649169922, |
| "learning_rate": 8.727379998414311e-06, |
| "loss": 0.2298, |
| "num_input_tokens_seen": 194176, |
| "step": 310 |
| }, |
| { |
| "epoch": 6.3, |
| "grad_norm": 30.993518829345703, |
| "learning_rate": 8.668649322514382e-06, |
| "loss": 0.1126, |
| "num_input_tokens_seen": 197376, |
| "step": 315 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 14.220212936401367, |
| "learning_rate": 8.608801140491811e-06, |
| "loss": 0.1026, |
| "num_input_tokens_seen": 200448, |
| "step": 320 |
| }, |
| { |
| "epoch": 6.5, |
| "grad_norm": 24.607812881469727, |
| "learning_rate": 8.547853682682605e-06, |
| "loss": 0.1741, |
| "num_input_tokens_seen": 203360, |
| "step": 325 |
| }, |
| { |
| "epoch": 6.6, |
| "grad_norm": 23.7320613861084, |
| "learning_rate": 8.485825514272824e-06, |
| "loss": 0.1554, |
| "num_input_tokens_seen": 206496, |
| "step": 330 |
| }, |
| { |
| "epoch": 6.7, |
| "grad_norm": 2.555202007293701, |
| "learning_rate": 8.422735529643445e-06, |
| "loss": 0.0648, |
| "num_input_tokens_seen": 210112, |
| "step": 335 |
| }, |
| { |
| "epoch": 6.8, |
| "grad_norm": 19.803964614868164, |
| "learning_rate": 8.358602946614952e-06, |
| "loss": 0.154, |
| "num_input_tokens_seen": 212672, |
| "step": 340 |
| }, |
| { |
| "epoch": 6.9, |
| "grad_norm": 2.451209783554077, |
| "learning_rate": 8.293447300593402e-06, |
| "loss": 0.0796, |
| "num_input_tokens_seen": 215936, |
| "step": 345 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 1.0204869508743286, |
| "learning_rate": 8.227288438619754e-06, |
| "loss": 0.1656, |
| "num_input_tokens_seen": 218720, |
| "step": 350 |
| }, |
| { |
| "epoch": 7.1, |
| "grad_norm": 2.2546634674072266, |
| "learning_rate": 8.160146513324256e-06, |
| "loss": 0.0317, |
| "num_input_tokens_seen": 221888, |
| "step": 355 |
| }, |
| { |
| "epoch": 7.2, |
| "grad_norm": 1.181561827659607, |
| "learning_rate": 8.092041976787772e-06, |
| "loss": 0.1045, |
| "num_input_tokens_seen": 225408, |
| "step": 360 |
| }, |
| { |
| "epoch": 7.3, |
| "grad_norm": 0.22805003821849823, |
| "learning_rate": 8.022995574311876e-06, |
| "loss": 0.0727, |
| "num_input_tokens_seen": 228608, |
| "step": 365 |
| }, |
| { |
| "epoch": 7.4, |
| "grad_norm": 26.389677047729492, |
| "learning_rate": 7.953028338099628e-06, |
| "loss": 0.0234, |
| "num_input_tokens_seen": 232160, |
| "step": 370 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 2.29594349861145, |
| "learning_rate": 7.882161580848966e-06, |
| "loss": 0.1214, |
| "num_input_tokens_seen": 235072, |
| "step": 375 |
| }, |
| { |
| "epoch": 7.6, |
| "grad_norm": 2.273941993713379, |
| "learning_rate": 7.810416889260653e-06, |
| "loss": 0.0632, |
| "num_input_tokens_seen": 237792, |
| "step": 380 |
| }, |
| { |
| "epoch": 7.7, |
| "grad_norm": 14.685662269592285, |
| "learning_rate": 7.737816117462752e-06, |
| "loss": 0.0644, |
| "num_input_tokens_seen": 240960, |
| "step": 385 |
| }, |
| { |
| "epoch": 7.8, |
| "grad_norm": 29.62261962890625, |
| "learning_rate": 7.66438138035365e-06, |
| "loss": 0.1763, |
| "num_input_tokens_seen": 244608, |
| "step": 390 |
| }, |
| { |
| "epoch": 7.9, |
| "grad_norm": 1.3242279291152954, |
| "learning_rate": 7.590135046865652e-06, |
| "loss": 0.0379, |
| "num_input_tokens_seen": 247296, |
| "step": 395 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 36.12474822998047, |
| "learning_rate": 7.515099733151177e-06, |
| "loss": 0.31, |
| "num_input_tokens_seen": 249984, |
| "step": 400 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.23022499680519104, |
| "eval_runtime": 1.0911, |
| "eval_samples_per_second": 45.826, |
| "eval_steps_per_second": 11.915, |
| "num_input_tokens_seen": 249984, |
| "step": 400 |
| }, |
| { |
| "epoch": 8.1, |
| "grad_norm": 33.88733673095703, |
| "learning_rate": 7.4392982956936644e-06, |
| "loss": 0.0672, |
| "num_input_tokens_seen": 252800, |
| "step": 405 |
| }, |
| { |
| "epoch": 8.2, |
| "grad_norm": 3.349085807800293, |
| "learning_rate": 7.362753824345271e-06, |
| "loss": 0.052, |
| "num_input_tokens_seen": 255488, |
| "step": 410 |
| }, |
| { |
| "epoch": 8.3, |
| "grad_norm": 34.323062896728516, |
| "learning_rate": 7.285489635293472e-06, |
| "loss": 0.0539, |
| "num_input_tokens_seen": 258656, |
| "step": 415 |
| }, |
| { |
| "epoch": 8.4, |
| "grad_norm": 32.22492980957031, |
| "learning_rate": 7.207529263958727e-06, |
| "loss": 0.1367, |
| "num_input_tokens_seen": 261984, |
| "step": 420 |
| }, |
| { |
| "epoch": 8.5, |
| "grad_norm": 5.826536178588867, |
| "learning_rate": 7.128896457825364e-06, |
| "loss": 0.0645, |
| "num_input_tokens_seen": 265152, |
| "step": 425 |
| }, |
| { |
| "epoch": 8.6, |
| "grad_norm": 13.194087982177734, |
| "learning_rate": 7.049615169207864e-06, |
| "loss": 0.0374, |
| "num_input_tokens_seen": 268000, |
| "step": 430 |
| }, |
| { |
| "epoch": 8.7, |
| "grad_norm": 71.0287857055664, |
| "learning_rate": 6.9697095479547564e-06, |
| "loss": 0.139, |
| "num_input_tokens_seen": 271808, |
| "step": 435 |
| }, |
| { |
| "epoch": 8.8, |
| "grad_norm": 1.0513635873794556, |
| "learning_rate": 6.889203934092337e-06, |
| "loss": 0.0079, |
| "num_input_tokens_seen": 275520, |
| "step": 440 |
| }, |
| { |
| "epoch": 8.9, |
| "grad_norm": 46.610313415527344, |
| "learning_rate": 6.808122850410461e-06, |
| "loss": 0.0963, |
| "num_input_tokens_seen": 278176, |
| "step": 445 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.6356768012046814, |
| "learning_rate": 6.7264909949926735e-06, |
| "loss": 0.0933, |
| "num_input_tokens_seen": 281248, |
| "step": 450 |
| }, |
| { |
| "epoch": 9.1, |
| "grad_norm": 0.4989936053752899, |
| "learning_rate": 6.644333233692917e-06, |
| "loss": 0.0743, |
| "num_input_tokens_seen": 284480, |
| "step": 455 |
| }, |
| { |
| "epoch": 9.2, |
| "grad_norm": 4.801666259765625, |
| "learning_rate": 6.561674592561164e-06, |
| "loss": 0.0085, |
| "num_input_tokens_seen": 287360, |
| "step": 460 |
| }, |
| { |
| "epoch": 9.3, |
| "grad_norm": 0.1441125124692917, |
| "learning_rate": 6.4785402502202345e-06, |
| "loss": 0.0492, |
| "num_input_tokens_seen": 290304, |
| "step": 465 |
| }, |
| { |
| "epoch": 9.4, |
| "grad_norm": 28.074064254760742, |
| "learning_rate": 6.3949555301961474e-06, |
| "loss": 0.0532, |
| "num_input_tokens_seen": 292640, |
| "step": 470 |
| }, |
| { |
| "epoch": 9.5, |
| "grad_norm": 1.0933701992034912, |
| "learning_rate": 6.310945893204324e-06, |
| "loss": 0.005, |
| "num_input_tokens_seen": 295840, |
| "step": 475 |
| }, |
| { |
| "epoch": 9.6, |
| "grad_norm": 0.2768193781375885, |
| "learning_rate": 6.2265369293940135e-06, |
| "loss": 0.0019, |
| "num_input_tokens_seen": 298720, |
| "step": 480 |
| }, |
| { |
| "epoch": 9.7, |
| "grad_norm": 0.11863156408071518, |
| "learning_rate": 6.141754350553279e-06, |
| "loss": 0.0065, |
| "num_input_tokens_seen": 301984, |
| "step": 485 |
| }, |
| { |
| "epoch": 9.8, |
| "grad_norm": 0.5300511717796326, |
| "learning_rate": 6.056623982276945e-06, |
| "loss": 0.048, |
| "num_input_tokens_seen": 305344, |
| "step": 490 |
| }, |
| { |
| "epoch": 9.9, |
| "grad_norm": 26.73280143737793, |
| "learning_rate": 5.97117175609986e-06, |
| "loss": 0.0699, |
| "num_input_tokens_seen": 308448, |
| "step": 495 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 24.945024490356445, |
| "learning_rate": 5.885423701597918e-06, |
| "loss": 0.0348, |
| "num_input_tokens_seen": 311904, |
| "step": 500 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.213058739900589, |
| "eval_runtime": 1.0877, |
| "eval_samples_per_second": 45.97, |
| "eval_steps_per_second": 11.952, |
| "num_input_tokens_seen": 311904, |
| "step": 500 |
| }, |
| { |
| "epoch": 10.1, |
| "grad_norm": 0.17195160686969757, |
| "learning_rate": 5.799405938459175e-06, |
| "loss": 0.0075, |
| "num_input_tokens_seen": 314752, |
| "step": 505 |
| }, |
| { |
| "epoch": 10.2, |
| "grad_norm": 0.021077385172247887, |
| "learning_rate": 5.7131446685275595e-06, |
| "loss": 0.0091, |
| "num_input_tokens_seen": 318240, |
| "step": 510 |
| }, |
| { |
| "epoch": 10.3, |
| "grad_norm": 13.625564575195312, |
| "learning_rate": 5.626666167821522e-06, |
| "loss": 0.0086, |
| "num_input_tokens_seen": 321184, |
| "step": 515 |
| }, |
| { |
| "epoch": 10.4, |
| "grad_norm": 0.19901487231254578, |
| "learning_rate": 5.539996778530114e-06, |
| "loss": 0.0047, |
| "num_input_tokens_seen": 323840, |
| "step": 520 |
| }, |
| { |
| "epoch": 10.5, |
| "grad_norm": 18.738676071166992, |
| "learning_rate": 5.453162900988902e-06, |
| "loss": 0.017, |
| "num_input_tokens_seen": 326784, |
| "step": 525 |
| }, |
| { |
| "epoch": 10.6, |
| "grad_norm": 0.4177151918411255, |
| "learning_rate": 5.366190985638159e-06, |
| "loss": 0.0051, |
| "num_input_tokens_seen": 329760, |
| "step": 530 |
| }, |
| { |
| "epoch": 10.7, |
| "grad_norm": 0.7757052183151245, |
| "learning_rate": 5.27910752496582e-06, |
| "loss": 0.0013, |
| "num_input_tokens_seen": 333120, |
| "step": 535 |
| }, |
| { |
| "epoch": 10.8, |
| "grad_norm": 0.22021430730819702, |
| "learning_rate": 5.1919390454376e-06, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 336736, |
| "step": 540 |
| }, |
| { |
| "epoch": 10.9, |
| "grad_norm": 2.485224723815918, |
| "learning_rate": 5.1047120994167855e-06, |
| "loss": 0.071, |
| "num_input_tokens_seen": 340192, |
| "step": 545 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 0.23819559812545776, |
| "learning_rate": 5.0174532570761194e-06, |
| "loss": 0.0105, |
| "num_input_tokens_seen": 343264, |
| "step": 550 |
| }, |
| { |
| "epoch": 11.1, |
| "grad_norm": 9.116790771484375, |
| "learning_rate": 4.9301890983042744e-06, |
| "loss": 0.0104, |
| "num_input_tokens_seen": 345920, |
| "step": 555 |
| }, |
| { |
| "epoch": 11.2, |
| "grad_norm": 0.05121885612607002, |
| "learning_rate": 4.842946204609359e-06, |
| "loss": 0.0013, |
| "num_input_tokens_seen": 348864, |
| "step": 560 |
| }, |
| { |
| "epoch": 11.3, |
| "grad_norm": 0.3958645462989807, |
| "learning_rate": 4.755751151021934e-06, |
| "loss": 0.001, |
| "num_input_tokens_seen": 352448, |
| "step": 565 |
| }, |
| { |
| "epoch": 11.4, |
| "grad_norm": 1.371279239654541, |
| "learning_rate": 4.668630498000001e-06, |
| "loss": 0.001, |
| "num_input_tokens_seen": 355200, |
| "step": 570 |
| }, |
| { |
| "epoch": 11.5, |
| "grad_norm": 0.7824882864952087, |
| "learning_rate": 4.581610783338424e-06, |
| "loss": 0.032, |
| "num_input_tokens_seen": 357760, |
| "step": 575 |
| }, |
| { |
| "epoch": 11.6, |
| "grad_norm": 0.03793586045503616, |
| "learning_rate": 4.494718514085269e-06, |
| "loss": 0.0017, |
| "num_input_tokens_seen": 361088, |
| "step": 580 |
| }, |
| { |
| "epoch": 11.7, |
| "grad_norm": 0.3265180289745331, |
| "learning_rate": 4.4079801584674955e-06, |
| "loss": 0.0013, |
| "num_input_tokens_seen": 364384, |
| "step": 585 |
| }, |
| { |
| "epoch": 11.8, |
| "grad_norm": 0.03340812399983406, |
| "learning_rate": 4.321422137828479e-06, |
| "loss": 0.0039, |
| "num_input_tokens_seen": 367424, |
| "step": 590 |
| }, |
| { |
| "epoch": 11.9, |
| "grad_norm": 0.24023908376693726, |
| "learning_rate": 4.23507081857981e-06, |
| "loss": 0.0007, |
| "num_input_tokens_seen": 370944, |
| "step": 595 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.24906252324581146, |
| "learning_rate": 4.148952504169839e-06, |
| "loss": 0.0007, |
| "num_input_tokens_seen": 374240, |
| "step": 600 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 0.25381362438201904, |
| "eval_runtime": 1.0953, |
| "eval_samples_per_second": 45.651, |
| "eval_steps_per_second": 11.869, |
| "num_input_tokens_seen": 374240, |
| "step": 600 |
| }, |
| { |
| "epoch": 12.1, |
| "grad_norm": 0.1331377625465393, |
| "learning_rate": 4.063093427071376e-06, |
| "loss": 0.0006, |
| "num_input_tokens_seen": 377440, |
| "step": 605 |
| }, |
| { |
| "epoch": 12.2, |
| "grad_norm": 0.04510519653558731, |
| "learning_rate": 3.977519740791049e-06, |
| "loss": 0.0005, |
| "num_input_tokens_seen": 380672, |
| "step": 610 |
| }, |
| { |
| "epoch": 12.3, |
| "grad_norm": 0.06641122698783875, |
| "learning_rate": 3.892257511902664e-06, |
| "loss": 0.0005, |
| "num_input_tokens_seen": 383520, |
| "step": 615 |
| }, |
| { |
| "epoch": 12.4, |
| "grad_norm": 0.873627245426178, |
| "learning_rate": 3.8073327121070968e-06, |
| "loss": 0.0041, |
| "num_input_tokens_seen": 386144, |
| "step": 620 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.016877591609954834, |
| "learning_rate": 3.7227712103210485e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 390080, |
| "step": 625 |
| }, |
| { |
| "epoch": 12.6, |
| "grad_norm": 0.009598777629435062, |
| "learning_rate": 3.6385987647971287e-06, |
| "loss": 0.0008, |
| "num_input_tokens_seen": 393568, |
| "step": 630 |
| }, |
| { |
| "epoch": 12.7, |
| "grad_norm": 0.02149510197341442, |
| "learning_rate": 3.5548410152776414e-06, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 396576, |
| "step": 635 |
| }, |
| { |
| "epoch": 12.8, |
| "grad_norm": 0.014570381492376328, |
| "learning_rate": 3.471523475184472e-06, |
| "loss": 0.0026, |
| "num_input_tokens_seen": 399296, |
| "step": 640 |
| }, |
| { |
| "epoch": 12.9, |
| "grad_norm": 0.05763368308544159, |
| "learning_rate": 3.3886715238474454e-06, |
| "loss": 0.0009, |
| "num_input_tokens_seen": 402240, |
| "step": 645 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.7486214637756348, |
| "learning_rate": 3.3063103987735433e-06, |
| "loss": 0.0008, |
| "num_input_tokens_seen": 405248, |
| "step": 650 |
| }, |
| { |
| "epoch": 13.1, |
| "grad_norm": 0.15113848447799683, |
| "learning_rate": 3.224465187959316e-06, |
| "loss": 0.0006, |
| "num_input_tokens_seen": 408416, |
| "step": 655 |
| }, |
| { |
| "epoch": 13.2, |
| "grad_norm": 0.2123100310564041, |
| "learning_rate": 3.1431608222488276e-06, |
| "loss": 0.0005, |
| "num_input_tokens_seen": 411104, |
| "step": 660 |
| }, |
| { |
| "epoch": 13.3, |
| "grad_norm": 0.2387603521347046, |
| "learning_rate": 3.0624220677394854e-06, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 414304, |
| "step": 665 |
| }, |
| { |
| "epoch": 13.4, |
| "grad_norm": 0.1298046112060547, |
| "learning_rate": 2.98227351823805e-06, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 417440, |
| "step": 670 |
| }, |
| { |
| "epoch": 13.5, |
| "grad_norm": 0.16697478294372559, |
| "learning_rate": 2.9027395877691143e-06, |
| "loss": 0.0005, |
| "num_input_tokens_seen": 420672, |
| "step": 675 |
| }, |
| { |
| "epoch": 13.6, |
| "grad_norm": 0.0768580213189125, |
| "learning_rate": 2.8238445031383634e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 423360, |
| "step": 680 |
| }, |
| { |
| "epoch": 13.7, |
| "grad_norm": 0.04880240559577942, |
| "learning_rate": 2.7456122965528475e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 426528, |
| "step": 685 |
| }, |
| { |
| "epoch": 13.8, |
| "grad_norm": 0.15164293348789215, |
| "learning_rate": 2.6680667983005446e-06, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 429728, |
| "step": 690 |
| }, |
| { |
| "epoch": 13.9, |
| "grad_norm": 0.05918792262673378, |
| "learning_rate": 2.5912316294914232e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 432512, |
| "step": 695 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.01722647435963154, |
| "learning_rate": 2.5151301948622235e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 436160, |
| "step": 700 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 0.23953387141227722, |
| "eval_runtime": 1.0903, |
| "eval_samples_per_second": 45.86, |
| "eval_steps_per_second": 11.924, |
| "num_input_tokens_seen": 436160, |
| "step": 700 |
| }, |
| { |
| "epoch": 14.1, |
| "grad_norm": 0.0866999700665474, |
| "learning_rate": 2.4397856756471435e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 439168, |
| "step": 705 |
| }, |
| { |
| "epoch": 14.2, |
| "grad_norm": 0.08916126191616058, |
| "learning_rate": 2.3652210225166122e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 442464, |
| "step": 710 |
| }, |
| { |
| "epoch": 14.3, |
| "grad_norm": 0.09090403467416763, |
| "learning_rate": 2.2914589485863015e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 445440, |
| "step": 715 |
| }, |
| { |
| "epoch": 14.4, |
| "grad_norm": 0.15647543966770172, |
| "learning_rate": 2.218521922498476e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 448320, |
| "step": 720 |
| }, |
| { |
| "epoch": 14.5, |
| "grad_norm": 0.014233211055397987, |
| "learning_rate": 2.146432161577842e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 452000, |
| "step": 725 |
| }, |
| { |
| "epoch": 14.6, |
| "grad_norm": 0.0156401414424181, |
| "learning_rate": 2.075211625063923e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 455104, |
| "step": 730 |
| }, |
| { |
| "epoch": 14.7, |
| "grad_norm": 0.038795698434114456, |
| "learning_rate": 2.0048820074220716e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 458176, |
| "step": 735 |
| }, |
| { |
| "epoch": 14.8, |
| "grad_norm": 0.01730630174279213, |
| "learning_rate": 1.9354647317351187e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 461120, |
| "step": 740 |
| }, |
| { |
| "epoch": 14.9, |
| "grad_norm": 0.009268361143767834, |
| "learning_rate": 1.8669809431776991e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 464064, |
| "step": 745 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.07223889231681824, |
| "learning_rate": 1.799451502575222e-06, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 467168, |
| "step": 750 |
| }, |
| { |
| "epoch": 15.1, |
| "grad_norm": 0.0142788952216506, |
| "learning_rate": 1.7328969800494727e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 470112, |
| "step": 755 |
| }, |
| { |
| "epoch": 15.2, |
| "grad_norm": 0.05893516167998314, |
| "learning_rate": 1.6673376487527382e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 473568, |
| "step": 760 |
| }, |
| { |
| "epoch": 15.3, |
| "grad_norm": 0.08952557295560837, |
| "learning_rate": 1.6027934786924187e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 476640, |
| "step": 765 |
| }, |
| { |
| "epoch": 15.4, |
| "grad_norm": 0.03307904675602913, |
| "learning_rate": 1.5392841306479667e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 480032, |
| "step": 770 |
| }, |
| { |
| "epoch": 15.5, |
| "grad_norm": 0.07773683220148087, |
| "learning_rate": 1.4768289501820265e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 483072, |
| "step": 775 |
| }, |
| { |
| "epoch": 15.6, |
| "grad_norm": 0.02797629125416279, |
| "learning_rate": 1.4154469617475864e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 485856, |
| "step": 780 |
| }, |
| { |
| "epoch": 15.7, |
| "grad_norm": 0.0267687626183033, |
| "learning_rate": 1.3551568628929434e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 489280, |
| "step": 785 |
| }, |
| { |
| "epoch": 15.8, |
| "grad_norm": 0.0966469869017601, |
| "learning_rate": 1.2959770185662502e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 492352, |
| "step": 790 |
| }, |
| { |
| "epoch": 15.9, |
| "grad_norm": 0.10477598756551743, |
| "learning_rate": 1.2379254555213788e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 496000, |
| "step": 795 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 0.04333118721842766, |
| "learning_rate": 1.1810198568267906e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 498688, |
| "step": 800 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 0.2310182899236679, |
| "eval_runtime": 1.0892, |
| "eval_samples_per_second": 45.903, |
| "eval_steps_per_second": 11.935, |
| "num_input_tokens_seen": 498688, |
| "step": 800 |
| }, |
| { |
| "epoch": 16.1, |
| "grad_norm": 0.04000323265790939, |
| "learning_rate": 1.1252775564791023e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 502016, |
| "step": 805 |
| }, |
| { |
| "epoch": 16.2, |
| "grad_norm": 0.017859216779470444, |
| "learning_rate": 1.0707155341229902e-06, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 505408, |
| "step": 810 |
| }, |
| { |
| "epoch": 16.3, |
| "grad_norm": 0.04486355930566788, |
| "learning_rate": 1.0173504098790188e-06, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 508128, |
| "step": 815 |
| }, |
| { |
| "epoch": 16.4, |
| "grad_norm": 0.11315115541219711, |
| "learning_rate": 9.651984392809916e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 511744, |
| "step": 820 |
| }, |
| { |
| "epoch": 16.5, |
| "grad_norm": 0.03962993994355202, |
| "learning_rate": 9.142755083243577e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 514464, |
| "step": 825 |
| }, |
| { |
| "epoch": 16.6, |
| "grad_norm": 0.0589391328394413, |
| "learning_rate": 8.645971286271903e-07, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 517376, |
| "step": 830 |
| }, |
| { |
| "epoch": 16.7, |
| "grad_norm": 0.0133219538256526, |
| "learning_rate": 8.161784327051919e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 520288, |
| "step": 835 |
| }, |
| { |
| "epoch": 16.8, |
| "grad_norm": 0.0061556631699204445, |
| "learning_rate": 7.690341693621805e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 523616, |
| "step": 840 |
| }, |
| { |
| "epoch": 16.9, |
| "grad_norm": 0.02465108223259449, |
| "learning_rate": 7.23178699197467e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 526752, |
| "step": 845 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 0.012180883437395096, |
| "learning_rate": 6.786259902314768e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 529472, |
| "step": 850 |
| }, |
| { |
| "epoch": 17.1, |
| "grad_norm": 0.03822903335094452, |
| "learning_rate": 6.353896136509524e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 532480, |
| "step": 855 |
| }, |
| { |
| "epoch": 17.2, |
| "grad_norm": 0.04699721932411194, |
| "learning_rate": 5.934827396750392e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 536000, |
| "step": 860 |
| }, |
| { |
| "epoch": 17.3, |
| "grad_norm": 0.04608152434229851, |
| "learning_rate": 5.529181335435124e-07, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 539328, |
| "step": 865 |
| }, |
| { |
| "epoch": 17.4, |
| "grad_norm": 0.03996019810438156, |
| "learning_rate": 5.137081516283582e-07, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 541984, |
| "step": 870 |
| }, |
| { |
| "epoch": 17.5, |
| "grad_norm": 0.155074805021286, |
| "learning_rate": 4.758647376699033e-07, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 545344, |
| "step": 875 |
| }, |
| { |
| "epoch": 17.6, |
| "grad_norm": 0.015391808934509754, |
| "learning_rate": 4.3939941913863525e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 548416, |
| "step": 880 |
| }, |
| { |
| "epoch": 17.7, |
| "grad_norm": 0.0511036142706871, |
| "learning_rate": 4.043233037238281e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 550912, |
| "step": 885 |
| }, |
| { |
| "epoch": 17.8, |
| "grad_norm": 0.05168919637799263, |
| "learning_rate": 3.7064707595002636e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 553952, |
| "step": 890 |
| }, |
| { |
| "epoch": 17.9, |
| "grad_norm": 0.05797732248902321, |
| "learning_rate": 3.3838099392243915e-07, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 557216, |
| "step": 895 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 0.010777189396321774, |
| "learning_rate": 3.0753488620222037e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 560544, |
| "step": 900 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_loss": 0.23601579666137695, |
| "eval_runtime": 1.087, |
| "eval_samples_per_second": 46.0, |
| "eval_steps_per_second": 11.96, |
| "num_input_tokens_seen": 560544, |
| "step": 900 |
| }, |
| { |
| "epoch": 18.1, |
| "grad_norm": 0.007938344962894917, |
| "learning_rate": 2.7811814881259503e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 563904, |
| "step": 905 |
| }, |
| { |
| "epoch": 18.2, |
| "grad_norm": 0.007656295783817768, |
| "learning_rate": 2.5013974237673824e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 566976, |
| "step": 910 |
| }, |
| { |
| "epoch": 18.3, |
| "grad_norm": 0.03496653586626053, |
| "learning_rate": 2.2360818938828189e-07, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 570112, |
| "step": 915 |
| }, |
| { |
| "epoch": 18.4, |
| "grad_norm": 0.09593867510557175, |
| "learning_rate": 1.9853157161528468e-07, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 573536, |
| "step": 920 |
| }, |
| { |
| "epoch": 18.5, |
| "grad_norm": 0.052002549171447754, |
| "learning_rate": 1.7491752763844294e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 576992, |
| "step": 925 |
| }, |
| { |
| "epoch": 18.6, |
| "grad_norm": 0.01646798849105835, |
| "learning_rate": 1.5277325052430569e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 579872, |
| "step": 930 |
| }, |
| { |
| "epoch": 18.7, |
| "grad_norm": 0.007558745332062244, |
| "learning_rate": 1.3210548563419857e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 583072, |
| "step": 935 |
| }, |
| { |
| "epoch": 18.8, |
| "grad_norm": 0.02522999420762062, |
| "learning_rate": 1.1292052856952063e-07, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 586080, |
| "step": 940 |
| }, |
| { |
| "epoch": 18.9, |
| "grad_norm": 0.01886642910540104, |
| "learning_rate": 9.522422325404234e-08, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 588768, |
| "step": 945 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.030690938234329224, |
| "learning_rate": 7.90219601537906e-08, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 591104, |
| "step": 950 |
| }, |
| { |
| "epoch": 19.1, |
| "grad_norm": 0.07759882509708405, |
| "learning_rate": 6.431867463506047e-08, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 594528, |
| "step": 955 |
| }, |
| { |
| "epoch": 19.2, |
| "grad_norm": 0.00944035779684782, |
| "learning_rate": 5.111884546105506e-08, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 597760, |
| "step": 960 |
| }, |
| { |
| "epoch": 19.3, |
| "grad_norm": 0.09614001959562302, |
| "learning_rate": 3.9426493427611177e-08, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 600704, |
| "step": 965 |
| }, |
| { |
| "epoch": 19.4, |
| "grad_norm": 0.04148221015930176, |
| "learning_rate": 2.9245180138423033e-08, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 603680, |
| "step": 970 |
| }, |
| { |
| "epoch": 19.5, |
| "grad_norm": 0.05608119070529938, |
| "learning_rate": 2.057800692014833e-08, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 606624, |
| "step": 975 |
| }, |
| { |
| "epoch": 19.6, |
| "grad_norm": 0.007554756943136454, |
| "learning_rate": 1.3427613877709523e-08, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 609408, |
| "step": 980 |
| }, |
| { |
| "epoch": 19.7, |
| "grad_norm": 0.025098580867052078, |
| "learning_rate": 7.796179090094891e-09, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 613312, |
| "step": 985 |
| }, |
| { |
| "epoch": 19.8, |
| "grad_norm": 0.03200896456837654, |
| "learning_rate": 3.685417946894254e-09, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 616224, |
| "step": 990 |
| }, |
| { |
| "epoch": 19.9, |
| "grad_norm": 0.027597220614552498, |
| "learning_rate": 1.096582625772502e-09, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 619456, |
| "step": 995 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.08454413712024689, |
| "learning_rate": 3.0461711048035415e-11, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 622560, |
| "step": 1000 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 0.23559831082820892, |
| "eval_runtime": 1.0917, |
| "eval_samples_per_second": 45.802, |
| "eval_steps_per_second": 11.909, |
| "num_input_tokens_seen": 622560, |
| "step": 1000 |
| }, |
| { |
| "epoch": 20.0, |
| "num_input_tokens_seen": 622560, |
| "step": 1000, |
| "total_flos": 2.803359542280192e+16, |
| "train_loss": 0.37913614323007644, |
| "train_runtime": 173.4288, |
| "train_samples_per_second": 23.064, |
| "train_steps_per_second": 5.766 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1000, |
| "num_input_tokens_seen": 622560, |
| "num_train_epochs": 20, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.803359542280192e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|