| { | |
| "best_metric": 0.5424528301886793, | |
| "best_model_checkpoint": "./Validated_Balanced_Raw_Data_model_boost8_outputs/checkpoint-640", | |
| "epoch": 25.0, | |
| "eval_steps": 500, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 1.6869874000549316, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.326, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.3867924528301887, | |
| "eval_loss": 1.2989507913589478, | |
| "eval_runtime": 1.3738, | |
| "eval_samples_per_second": 154.315, | |
| "eval_steps_per_second": 10.191, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 4.385875701904297, | |
| "learning_rate": 2.97e-05, | |
| "loss": 1.2994, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 12.015926361083984, | |
| "learning_rate": 2.9950795096316707e-05, | |
| "loss": 1.2698, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.37264150943396224, | |
| "eval_loss": 1.2874739170074463, | |
| "eval_runtime": 1.3783, | |
| "eval_samples_per_second": 153.812, | |
| "eval_steps_per_second": 10.157, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 4.884477615356445, | |
| "learning_rate": 2.9803503201606352e-05, | |
| "loss": 1.2271, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.42452830188679247, | |
| "eval_loss": 1.2136298418045044, | |
| "eval_runtime": 1.3761, | |
| "eval_samples_per_second": 154.055, | |
| "eval_steps_per_second": 10.173, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "grad_norm": 13.981613159179688, | |
| "learning_rate": 2.955909064700128e-05, | |
| "loss": 1.2369, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 7.470583438873291, | |
| "learning_rate": 2.921124361809201e-05, | |
| "loss": 1.1742, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.4716981132075472, | |
| "eval_loss": 1.1844068765640259, | |
| "eval_runtime": 1.3851, | |
| "eval_samples_per_second": 153.063, | |
| "eval_steps_per_second": 10.108, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.375, | |
| "grad_norm": 3.380803346633911, | |
| "learning_rate": 2.8766319385259717e-05, | |
| "loss": 1.1633, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 23.32581901550293, | |
| "learning_rate": 2.822735723216188e-05, | |
| "loss": 1.1507, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.49056603773584906, | |
| "eval_loss": 1.1472444534301758, | |
| "eval_runtime": 1.3725, | |
| "eval_samples_per_second": 154.464, | |
| "eval_steps_per_second": 10.2, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.625, | |
| "grad_norm": 5.383129596710205, | |
| "learning_rate": 2.7598038816804598e-05, | |
| "loss": 1.1228, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.46226415094339623, | |
| "eval_loss": 1.156751275062561, | |
| "eval_runtime": 1.3826, | |
| "eval_samples_per_second": 153.335, | |
| "eval_steps_per_second": 10.126, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 5.6944193840026855, | |
| "learning_rate": 2.6882663022085234e-05, | |
| "loss": 1.0677, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.875, | |
| "grad_norm": 5.666170597076416, | |
| "learning_rate": 2.608611659006323e-05, | |
| "loss": 1.0484, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.4811320754716981, | |
| "eval_loss": 1.1222484111785889, | |
| "eval_runtime": 1.3869, | |
| "eval_samples_per_second": 152.855, | |
| "eval_steps_per_second": 10.094, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 6.5580291748046875, | |
| "learning_rate": 2.5213840740556754e-05, | |
| "loss": 1.0224, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5424528301886793, | |
| "eval_loss": 1.1053968667984009, | |
| "eval_runtime": 1.3797, | |
| "eval_samples_per_second": 153.654, | |
| "eval_steps_per_second": 10.147, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 8.125, | |
| "grad_norm": 6.871710300445557, | |
| "learning_rate": 2.4271794002094025e-05, | |
| "loss": 0.9804, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "grad_norm": 11.924201011657715, | |
| "learning_rate": 2.3287096096947202e-05, | |
| "loss": 0.9876, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.1332839727401733, | |
| "eval_runtime": 1.3822, | |
| "eval_samples_per_second": 153.382, | |
| "eval_steps_per_second": 10.129, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 9.375, | |
| "grad_norm": 10.64323616027832, | |
| "learning_rate": 2.222630511152573e-05, | |
| "loss": 0.897, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 25.509368896484375, | |
| "learning_rate": 2.1116151134815555e-05, | |
| "loss": 0.9897, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.4811320754716981, | |
| "eval_loss": 1.1367976665496826, | |
| "eval_runtime": 1.3805, | |
| "eval_samples_per_second": 153.567, | |
| "eval_steps_per_second": 10.141, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 10.625, | |
| "grad_norm": 19.3117733001709, | |
| "learning_rate": 1.9964217644158925e-05, | |
| "loss": 0.9133, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.0922900438308716, | |
| "eval_runtime": 1.382, | |
| "eval_samples_per_second": 153.404, | |
| "eval_steps_per_second": 10.13, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "grad_norm": 15.118478775024414, | |
| "learning_rate": 1.8778373513342223e-05, | |
| "loss": 0.9207, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 11.875, | |
| "grad_norm": 20.54922103881836, | |
| "learning_rate": 1.7591151985494456e-05, | |
| "loss": 0.8814, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.4716981132075472, | |
| "eval_loss": 1.1101481914520264, | |
| "eval_runtime": 1.3773, | |
| "eval_samples_per_second": 153.926, | |
| "eval_steps_per_second": 10.165, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 20.49024200439453, | |
| "learning_rate": 1.6362233121333124e-05, | |
| "loss": 0.8185, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.49528301886792453, | |
| "eval_loss": 1.1416065692901611, | |
| "eval_runtime": 1.3819, | |
| "eval_samples_per_second": 153.417, | |
| "eval_steps_per_second": 10.131, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 13.125, | |
| "grad_norm": 19.4349365234375, | |
| "learning_rate": 1.5124008823666874e-05, | |
| "loss": 0.8224, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "grad_norm": 23.32369041442871, | |
| "learning_rate": 1.3884937419991688e-05, | |
| "loss": 0.7917, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.5047169811320755, | |
| "eval_loss": 1.1236770153045654, | |
| "eval_runtime": 1.3744, | |
| "eval_samples_per_second": 154.25, | |
| "eval_steps_per_second": 10.186, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 14.375, | |
| "grad_norm": 9.466968536376953, | |
| "learning_rate": 1.2653483024396535e-05, | |
| "loss": 0.7934, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 24.469558715820312, | |
| "learning_rate": 1.1438057719081672e-05, | |
| "loss": 0.7773, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.5047169811320755, | |
| "eval_loss": 1.099358081817627, | |
| "eval_runtime": 1.3935, | |
| "eval_samples_per_second": 152.138, | |
| "eval_steps_per_second": 10.047, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 15.625, | |
| "grad_norm": 10.011290550231934, | |
| "learning_rate": 1.0246964091307435e-05, | |
| "loss": 0.7289, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.5094339622641509, | |
| "eval_loss": 1.1058666706085205, | |
| "eval_runtime": 1.3811, | |
| "eval_samples_per_second": 153.497, | |
| "eval_steps_per_second": 10.137, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "grad_norm": 22.63361358642578, | |
| "learning_rate": 9.08833851830458e-06, | |
| "loss": 0.7104, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 16.875, | |
| "grad_norm": 4.78521203994751, | |
| "learning_rate": 7.97009558756758e-06, | |
| "loss": 0.7337, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.5141509433962265, | |
| "eval_loss": 1.1084901094436646, | |
| "eval_runtime": 1.3844, | |
| "eval_samples_per_second": 153.134, | |
| "eval_steps_per_second": 10.113, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "grad_norm": 13.2825288772583, | |
| "learning_rate": 6.899874032196796e-06, | |
| "loss": 0.7052, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.5188679245283019, | |
| "eval_loss": 1.1131378412246704, | |
| "eval_runtime": 1.3839, | |
| "eval_samples_per_second": 153.192, | |
| "eval_steps_per_second": 10.116, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 18.125, | |
| "grad_norm": 10.307984352111816, | |
| "learning_rate": 5.884984550605782e-06, | |
| "loss": 0.7075, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "grad_norm": 12.91500186920166, | |
| "learning_rate": 4.93235986703821e-06, | |
| "loss": 0.6703, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.5330188679245284, | |
| "eval_loss": 1.1068452596664429, | |
| "eval_runtime": 1.3783, | |
| "eval_samples_per_second": 153.808, | |
| "eval_steps_per_second": 10.157, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 19.375, | |
| "grad_norm": 22.758438110351562, | |
| "learning_rate": 4.048507374031557e-06, | |
| "loss": 0.6943, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 19.057764053344727, | |
| "learning_rate": 3.2394646803277063e-06, | |
| "loss": 0.6482, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.5188679245283019, | |
| "eval_loss": 1.1251068115234375, | |
| "eval_runtime": 1.3783, | |
| "eval_samples_per_second": 153.811, | |
| "eval_steps_per_second": 10.157, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 20.625, | |
| "grad_norm": 24.637718200683594, | |
| "learning_rate": 2.5107583678831445e-06, | |
| "loss": 0.6421, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.11643385887146, | |
| "eval_runtime": 1.4272, | |
| "eval_samples_per_second": 148.544, | |
| "eval_steps_per_second": 9.81, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 21.25, | |
| "grad_norm": 18.683311462402344, | |
| "learning_rate": 1.867366239710358e-06, | |
| "loss": 0.6103, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 21.875, | |
| "grad_norm": 13.443337440490723, | |
| "learning_rate": 1.313683316435793e-06, | |
| "loss": 0.6738, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.5377358490566038, | |
| "eval_loss": 1.1147156953811646, | |
| "eval_runtime": 1.3829, | |
| "eval_samples_per_second": 153.297, | |
| "eval_steps_per_second": 10.123, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "grad_norm": 24.684009552001953, | |
| "learning_rate": 8.534918138525211e-07, | |
| "loss": 0.6459, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.1151989698410034, | |
| "eval_runtime": 1.381, | |
| "eval_samples_per_second": 153.516, | |
| "eval_steps_per_second": 10.138, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 23.125, | |
| "grad_norm": 7.173917293548584, | |
| "learning_rate": 4.899353065512263e-07, | |
| "loss": 0.6673, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 23.75, | |
| "grad_norm": 9.602700233459473, | |
| "learning_rate": 2.2549725411822485e-07, | |
| "loss": 0.6302, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.1155682802200317, | |
| "eval_runtime": 1.3752, | |
| "eval_samples_per_second": 154.158, | |
| "eval_steps_per_second": 10.18, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 24.375, | |
| "grad_norm": 34.81510925292969, | |
| "learning_rate": 6.198403658829233e-08, | |
| "loss": 0.6049, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 22.63814926147461, | |
| "learning_rate": 5.126150373813144e-10, | |
| "loss": 0.689, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.1156727075576782, | |
| "eval_runtime": 1.3867, | |
| "eval_samples_per_second": 152.879, | |
| "eval_steps_per_second": 10.096, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "step": 2000, | |
| "total_flos": 2.1204873264365568e+18, | |
| "train_loss": 0.8860943021774292, | |
| "train_runtime": 622.5434, | |
| "train_samples_per_second": 25.54, | |
| "train_steps_per_second": 3.213 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1204873264365568e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |