| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 342, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011695906432748537, |
| "grad_norm": 0.333984375, |
| "learning_rate": 0.0003, |
| "loss": 0.3604, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.023391812865497075, |
| "grad_norm": 0.359375, |
| "learning_rate": 0.00029989634325549745, |
| "loss": 0.3609, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03508771929824561, |
| "grad_norm": 0.365234375, |
| "learning_rate": 0.00029958551628493234, |
| "loss": 0.359, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.04678362573099415, |
| "grad_norm": 0.40625, |
| "learning_rate": 0.00029906794867912953, |
| "loss": 0.3622, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.05847953216374269, |
| "grad_norm": 0.369140625, |
| "learning_rate": 0.0002983443557630634, |
| "loss": 0.3654, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07017543859649122, |
| "grad_norm": 0.34765625, |
| "learning_rate": 0.0002974157376072144, |
| "loss": 0.4022, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.08187134502923976, |
| "grad_norm": 0.365234375, |
| "learning_rate": 0.0002962833776453813, |
| "loss": 0.3845, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0935672514619883, |
| "grad_norm": 0.38671875, |
| "learning_rate": 0.00029494884090086083, |
| "loss": 0.4164, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 0.373046875, |
| "learning_rate": 0.00029341397182344444, |
| "loss": 0.3409, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.11695906432748537, |
| "grad_norm": 0.376953125, |
| "learning_rate": 0.0002916808917402228, |
| "loss": 0.3849, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1286549707602339, |
| "grad_norm": 0.3984375, |
| "learning_rate": 0.0002897519959237211, |
| "loss": 0.3758, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.14035087719298245, |
| "grad_norm": 0.380859375, |
| "learning_rate": 0.00028762995028141694, |
| "loss": 0.4021, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.15204678362573099, |
| "grad_norm": 0.388671875, |
| "learning_rate": 0.00028531768767121657, |
| "loss": 0.4045, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.16374269005847952, |
| "grad_norm": 0.37109375, |
| "learning_rate": 0.0002828184038479814, |
| "loss": 0.3909, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.17543859649122806, |
| "grad_norm": 0.365234375, |
| "learning_rate": 0.00028013555304670765, |
| "loss": 0.3994, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1871345029239766, |
| "grad_norm": 0.3828125, |
| "learning_rate": 0.00027727284320846243, |
| "loss": 0.351, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.19883040935672514, |
| "grad_norm": 0.400390625, |
| "learning_rate": 0.0002742342308556763, |
| "loss": 0.3811, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 0.3828125, |
| "learning_rate": 0.00027102391562387317, |
| "loss": 0.4118, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 0.353515625, |
| "learning_rate": 0.0002676463344573965, |
| "loss": 0.4378, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.23391812865497075, |
| "grad_norm": 0.37109375, |
| "learning_rate": 0.00026410615547715297, |
| "loss": 0.4012, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.24561403508771928, |
| "grad_norm": 0.390625, |
| "learning_rate": 0.0002604082715288501, |
| "loss": 0.3868, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.2573099415204678, |
| "grad_norm": 0.359375, |
| "learning_rate": 0.00025655779342064275, |
| "loss": 0.3576, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.26900584795321636, |
| "grad_norm": 0.37890625, |
| "learning_rate": 0.00025256004285953735, |
| "loss": 0.3907, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2807017543859649, |
| "grad_norm": 0.37109375, |
| "learning_rate": 0.0002484205450963138, |
| "loss": 0.4188, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.29239766081871343, |
| "grad_norm": 0.373046875, |
| "learning_rate": 0.00024414502128913227, |
| "loss": 0.3973, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.30409356725146197, |
| "grad_norm": 0.375, |
| "learning_rate": 0.0002397393805963781, |
| "loss": 0.3653, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 0.3984375, |
| "learning_rate": 0.00023520971200967334, |
| "loss": 0.426, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.32748538011695905, |
| "grad_norm": 0.33203125, |
| "learning_rate": 0.00023056227593834302, |
| "loss": 0.3664, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3391812865497076, |
| "grad_norm": 0.34765625, |
| "learning_rate": 0.0002258034955569662, |
| "loss": 0.4021, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.3508771929824561, |
| "grad_norm": 0.376953125, |
| "learning_rate": 0.00022093994792797152, |
| "loss": 0.3933, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.36257309941520466, |
| "grad_norm": 0.390625, |
| "learning_rate": 0.00021597835491154492, |
| "loss": 0.3885, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.3742690058479532, |
| "grad_norm": 0.361328125, |
| "learning_rate": 0.00021092557387541476, |
| "loss": 0.4028, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.38596491228070173, |
| "grad_norm": 0.388671875, |
| "learning_rate": 0.00020578858821735302, |
| "loss": 0.3869, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.39766081871345027, |
| "grad_norm": 0.38671875, |
| "learning_rate": 0.0002005744977134912, |
| "loss": 0.3927, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.4093567251461988, |
| "grad_norm": 0.390625, |
| "learning_rate": 0.0001952905087057917, |
| "loss": 0.4099, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 0.36328125, |
| "learning_rate": 0.00018994392414223475, |
| "loss": 0.352, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4327485380116959, |
| "grad_norm": 0.373046875, |
| "learning_rate": 0.00018454213348348796, |
| "loss": 0.417, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.375, |
| "learning_rate": 0.0001790926024900069, |
| "loss": 0.3728, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.45614035087719296, |
| "grad_norm": 0.40234375, |
| "learning_rate": 0.0001736028629036829, |
| "loss": 0.4106, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.4678362573099415, |
| "grad_norm": 0.365234375, |
| "learning_rate": 0.00016808050203829842, |
| "loss": 0.45, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.47953216374269003, |
| "grad_norm": 0.376953125, |
| "learning_rate": 0.0001625331522931772, |
| "loss": 0.3749, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.49122807017543857, |
| "grad_norm": 0.380859375, |
| "learning_rate": 0.0001569684806045217, |
| "loss": 0.402, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5029239766081871, |
| "grad_norm": 0.369140625, |
| "learning_rate": 0.00015139417784901834, |
| "loss": 0.3631, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.5146198830409356, |
| "grad_norm": 0.36328125, |
| "learning_rate": 0.00014581794821435376, |
| "loss": 0.3859, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 0.3828125, |
| "learning_rate": 0.0001402474985513351, |
| "loss": 0.3781, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5380116959064327, |
| "grad_norm": 0.345703125, |
| "learning_rate": 0.00013469052772232873, |
| "loss": 0.3859, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5497076023391813, |
| "grad_norm": 0.353515625, |
| "learning_rate": 0.0001291547159607405, |
| "loss": 0.3901, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5614035087719298, |
| "grad_norm": 0.359375, |
| "learning_rate": 0.0001236477142562421, |
| "loss": 0.3818, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5730994152046783, |
| "grad_norm": 0.357421875, |
| "learning_rate": 0.00011817713378041565, |
| "loss": 0.376, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5847953216374269, |
| "grad_norm": 0.365234375, |
| "learning_rate": 0.00011275053536743006, |
| "loss": 0.3491, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5964912280701754, |
| "grad_norm": 0.3828125, |
| "learning_rate": 0.0001073754190642881, |
| "loss": 0.4016, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.6081871345029239, |
| "grad_norm": 0.337890625, |
| "learning_rate": 0.0001020592137650872, |
| "loss": 0.3907, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.6198830409356725, |
| "grad_norm": 0.3515625, |
| "learning_rate": 9.680926694361964e-05, |
| "loss": 0.3731, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 0.380859375, |
| "learning_rate": 9.163283449850317e-05, |
| "loss": 0.3877, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6432748538011696, |
| "grad_norm": 0.35546875, |
| "learning_rate": 8.653707072487629e-05, |
| "loss": 0.3481, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6549707602339181, |
| "grad_norm": 0.32421875, |
| "learning_rate": 8.152901842651953e-05, |
| "loss": 0.3408, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.326171875, |
| "learning_rate": 7.661559918206663e-05, |
| "loss": 0.3984, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.6783625730994152, |
| "grad_norm": 0.337890625, |
| "learning_rate": 7.180360377876123e-05, |
| "loss": 0.3573, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.6900584795321637, |
| "grad_norm": 0.3671875, |
| "learning_rate": 6.709968282697749e-05, |
| "loss": 0.3781, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.7017543859649122, |
| "grad_norm": 0.341796875, |
| "learning_rate": 6.251033756847875e-05, |
| "loss": 0.3328, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7134502923976608, |
| "grad_norm": 0.357421875, |
| "learning_rate": 5.804191089111711e-05, |
| "loss": 0.369, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.7251461988304093, |
| "grad_norm": 0.33984375, |
| "learning_rate": 5.3700578562391386e-05, |
| "loss": 0.341, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "grad_norm": 0.37109375, |
| "learning_rate": 4.9492340693981646e-05, |
| "loss": 0.4122, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.7485380116959064, |
| "grad_norm": 0.34765625, |
| "learning_rate": 4.542301344905496e-05, |
| "loss": 0.3331, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7602339181286549, |
| "grad_norm": 0.34765625, |
| "learning_rate": 4.149822100380507e-05, |
| "loss": 0.3633, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7719298245614035, |
| "grad_norm": 0.359375, |
| "learning_rate": 3.7723387774334816e-05, |
| "loss": 0.32, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.783625730994152, |
| "grad_norm": 0.3671875, |
| "learning_rate": 3.410373091962575e-05, |
| "loss": 0.3594, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.7953216374269005, |
| "grad_norm": 0.345703125, |
| "learning_rate": 3.064425313095474e-05, |
| "loss": 0.3852, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.8070175438596491, |
| "grad_norm": 0.34765625, |
| "learning_rate": 2.734973571772527e-05, |
| "loss": 0.3965, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.8187134502923976, |
| "grad_norm": 0.345703125, |
| "learning_rate": 2.422473199926742e-05, |
| "loss": 0.3503, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8304093567251462, |
| "grad_norm": 0.349609375, |
| "learning_rate": 2.1273561011741404e-05, |
| "loss": 0.367, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.345703125, |
| "learning_rate": 1.8500301538841072e-05, |
| "loss": 0.354, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.8538011695906432, |
| "grad_norm": 0.35546875, |
| "learning_rate": 1.5908786474548e-05, |
| "loss": 0.3526, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.8654970760233918, |
| "grad_norm": 0.33984375, |
| "learning_rate": 1.3502597525727504e-05, |
| "loss": 0.3669, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.8771929824561403, |
| "grad_norm": 0.36328125, |
| "learning_rate": 1.1285060261887419e-05, |
| "loss": 0.3918, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.330078125, |
| "learning_rate": 9.259239518942219e-06, |
| "loss": 0.3425, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.9005847953216374, |
| "grad_norm": 0.36328125, |
| "learning_rate": 7.427935163333998e-06, |
| "loss": 0.36, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.9122807017543859, |
| "grad_norm": 0.3203125, |
| "learning_rate": 5.793678222365433e-06, |
| "loss": 0.376, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.9239766081871345, |
| "grad_norm": 0.349609375, |
| "learning_rate": 4.358727386092198e-06, |
| "loss": 0.3917, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.935672514619883, |
| "grad_norm": 0.357421875, |
| "learning_rate": 3.125065885610456e-06, |
| "loss": 0.385, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 0.32421875, |
| "learning_rate": 2.0943987520529725e-06, |
| "loss": 0.4026, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.9590643274853801, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.268150460082823e-06, |
| "loss": 0.3496, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.9707602339181286, |
| "grad_norm": 0.34765625, |
| "learning_rate": 6.47462959141265e-07, |
| "loss": 0.3549, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.9824561403508771, |
| "grad_norm": 0.341796875, |
| "learning_rate": 2.3319409517102984e-07, |
| "loss": 0.3478, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.9941520467836257, |
| "grad_norm": 0.322265625, |
| "learning_rate": 2.5916424995919837e-08, |
| "loss": 0.3478, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 342, |
| "total_flos": 2.2327861511297434e+17, |
| "train_loss": 0.37826008022877206, |
| "train_runtime": 1375.4554, |
| "train_samples_per_second": 7.937, |
| "train_steps_per_second": 0.249 |
| } |
| ], |
| "logging_steps": 4, |
| "max_steps": 342, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.2327861511297434e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|