{ "best_metric": 0.5424528301886793, "best_model_checkpoint": "./Validated_Balanced_Raw_Data_model_boost8_outputs/checkpoint-640", "epoch": 25.0, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.625, "grad_norm": 1.6869874000549316, "learning_rate": 1.5e-05, "loss": 1.326, "step": 50 }, { "epoch": 1.0, "eval_accuracy": 0.3867924528301887, "eval_loss": 1.2989507913589478, "eval_runtime": 1.3738, "eval_samples_per_second": 154.315, "eval_steps_per_second": 10.191, "step": 80 }, { "epoch": 1.25, "grad_norm": 4.385875701904297, "learning_rate": 2.97e-05, "loss": 1.2994, "step": 100 }, { "epoch": 1.875, "grad_norm": 12.015926361083984, "learning_rate": 2.9950795096316707e-05, "loss": 1.2698, "step": 150 }, { "epoch": 2.0, "eval_accuracy": 0.37264150943396224, "eval_loss": 1.2874739170074463, "eval_runtime": 1.3783, "eval_samples_per_second": 153.812, "eval_steps_per_second": 10.157, "step": 160 }, { "epoch": 2.5, "grad_norm": 4.884477615356445, "learning_rate": 2.9803503201606352e-05, "loss": 1.2271, "step": 200 }, { "epoch": 3.0, "eval_accuracy": 0.42452830188679247, "eval_loss": 1.2136298418045044, "eval_runtime": 1.3761, "eval_samples_per_second": 154.055, "eval_steps_per_second": 10.173, "step": 240 }, { "epoch": 3.125, "grad_norm": 13.981613159179688, "learning_rate": 2.955909064700128e-05, "loss": 1.2369, "step": 250 }, { "epoch": 3.75, "grad_norm": 7.470583438873291, "learning_rate": 2.921124361809201e-05, "loss": 1.1742, "step": 300 }, { "epoch": 4.0, "eval_accuracy": 0.4716981132075472, "eval_loss": 1.1844068765640259, "eval_runtime": 1.3851, "eval_samples_per_second": 153.063, "eval_steps_per_second": 10.108, "step": 320 }, { "epoch": 4.375, "grad_norm": 3.380803346633911, "learning_rate": 2.8766319385259717e-05, "loss": 1.1633, "step": 350 }, { "epoch": 5.0, "grad_norm": 23.32581901550293, "learning_rate": 2.822735723216188e-05, "loss": 1.1507, "step": 400 }, { "epoch": 5.0, "eval_accuracy": 0.49056603773584906, "eval_loss": 1.1472444534301758, "eval_runtime": 1.3725, "eval_samples_per_second": 154.464, "eval_steps_per_second": 10.2, "step": 400 }, { "epoch": 5.625, "grad_norm": 5.383129596710205, "learning_rate": 2.7598038816804598e-05, "loss": 1.1228, "step": 450 }, { "epoch": 6.0, "eval_accuracy": 0.46226415094339623, "eval_loss": 1.156751275062561, "eval_runtime": 1.3826, "eval_samples_per_second": 153.335, "eval_steps_per_second": 10.126, "step": 480 }, { "epoch": 6.25, "grad_norm": 5.6944193840026855, "learning_rate": 2.6882663022085234e-05, "loss": 1.0677, "step": 500 }, { "epoch": 6.875, "grad_norm": 5.666170597076416, "learning_rate": 2.608611659006323e-05, "loss": 1.0484, "step": 550 }, { "epoch": 7.0, "eval_accuracy": 0.4811320754716981, "eval_loss": 1.1222484111785889, "eval_runtime": 1.3869, "eval_samples_per_second": 152.855, "eval_steps_per_second": 10.094, "step": 560 }, { "epoch": 7.5, "grad_norm": 6.5580291748046875, "learning_rate": 2.5213840740556754e-05, "loss": 1.0224, "step": 600 }, { "epoch": 8.0, "eval_accuracy": 0.5424528301886793, "eval_loss": 1.1053968667984009, "eval_runtime": 1.3797, "eval_samples_per_second": 153.654, "eval_steps_per_second": 10.147, "step": 640 }, { "epoch": 8.125, "grad_norm": 6.871710300445557, "learning_rate": 2.4271794002094025e-05, "loss": 0.9804, "step": 650 }, { "epoch": 8.75, "grad_norm": 11.924201011657715, "learning_rate": 2.3287096096947202e-05, "loss": 0.9876, "step": 700 }, { "epoch": 9.0, "eval_accuracy": 0.5, "eval_loss": 1.1332839727401733, "eval_runtime": 1.3822, "eval_samples_per_second": 153.382, "eval_steps_per_second": 10.129, "step": 720 }, { "epoch": 9.375, "grad_norm": 10.64323616027832, "learning_rate": 2.222630511152573e-05, "loss": 0.897, "step": 750 }, { "epoch": 10.0, "grad_norm": 25.509368896484375, "learning_rate": 2.1116151134815555e-05, "loss": 0.9897, "step": 800 }, { "epoch": 10.0, "eval_accuracy": 0.4811320754716981, "eval_loss": 1.1367976665496826, "eval_runtime": 1.3805, "eval_samples_per_second": 153.567, "eval_steps_per_second": 10.141, "step": 800 }, { "epoch": 10.625, "grad_norm": 19.3117733001709, "learning_rate": 1.9964217644158925e-05, "loss": 0.9133, "step": 850 }, { "epoch": 11.0, "eval_accuracy": 0.5, "eval_loss": 1.0922900438308716, "eval_runtime": 1.382, "eval_samples_per_second": 153.404, "eval_steps_per_second": 10.13, "step": 880 }, { "epoch": 11.25, "grad_norm": 15.118478775024414, "learning_rate": 1.8778373513342223e-05, "loss": 0.9207, "step": 900 }, { "epoch": 11.875, "grad_norm": 20.54922103881836, "learning_rate": 1.7591151985494456e-05, "loss": 0.8814, "step": 950 }, { "epoch": 12.0, "eval_accuracy": 0.4716981132075472, "eval_loss": 1.1101481914520264, "eval_runtime": 1.3773, "eval_samples_per_second": 153.926, "eval_steps_per_second": 10.165, "step": 960 }, { "epoch": 12.5, "grad_norm": 20.49024200439453, "learning_rate": 1.6362233121333124e-05, "loss": 0.8185, "step": 1000 }, { "epoch": 13.0, "eval_accuracy": 0.49528301886792453, "eval_loss": 1.1416065692901611, "eval_runtime": 1.3819, "eval_samples_per_second": 153.417, "eval_steps_per_second": 10.131, "step": 1040 }, { "epoch": 13.125, "grad_norm": 19.4349365234375, "learning_rate": 1.5124008823666874e-05, "loss": 0.8224, "step": 1050 }, { "epoch": 13.75, "grad_norm": 23.32369041442871, "learning_rate": 1.3884937419991688e-05, "loss": 0.7917, "step": 1100 }, { "epoch": 14.0, "eval_accuracy": 0.5047169811320755, "eval_loss": 1.1236770153045654, "eval_runtime": 1.3744, "eval_samples_per_second": 154.25, "eval_steps_per_second": 10.186, "step": 1120 }, { "epoch": 14.375, "grad_norm": 9.466968536376953, "learning_rate": 1.2653483024396535e-05, "loss": 0.7934, "step": 1150 }, { "epoch": 15.0, "grad_norm": 24.469558715820312, "learning_rate": 1.1438057719081672e-05, "loss": 0.7773, "step": 1200 }, { "epoch": 15.0, "eval_accuracy": 0.5047169811320755, "eval_loss": 1.099358081817627, "eval_runtime": 1.3935, "eval_samples_per_second": 152.138, "eval_steps_per_second": 10.047, "step": 1200 }, { "epoch": 15.625, "grad_norm": 10.011290550231934, "learning_rate": 1.0246964091307435e-05, "loss": 0.7289, "step": 1250 }, { "epoch": 16.0, "eval_accuracy": 0.5094339622641509, "eval_loss": 1.1058666706085205, "eval_runtime": 1.3811, "eval_samples_per_second": 153.497, "eval_steps_per_second": 10.137, "step": 1280 }, { "epoch": 16.25, "grad_norm": 22.63361358642578, "learning_rate": 9.08833851830458e-06, "loss": 0.7104, "step": 1300 }, { "epoch": 16.875, "grad_norm": 4.78521203994751, "learning_rate": 7.97009558756758e-06, "loss": 0.7337, "step": 1350 }, { "epoch": 17.0, "eval_accuracy": 0.5141509433962265, "eval_loss": 1.1084901094436646, "eval_runtime": 1.3844, "eval_samples_per_second": 153.134, "eval_steps_per_second": 10.113, "step": 1360 }, { "epoch": 17.5, "grad_norm": 13.2825288772583, "learning_rate": 6.899874032196796e-06, "loss": 0.7052, "step": 1400 }, { "epoch": 18.0, "eval_accuracy": 0.5188679245283019, "eval_loss": 1.1131378412246704, "eval_runtime": 1.3839, "eval_samples_per_second": 153.192, "eval_steps_per_second": 10.116, "step": 1440 }, { "epoch": 18.125, "grad_norm": 10.307984352111816, "learning_rate": 5.884984550605782e-06, "loss": 0.7075, "step": 1450 }, { "epoch": 18.75, "grad_norm": 12.91500186920166, "learning_rate": 4.93235986703821e-06, "loss": 0.6703, "step": 1500 }, { "epoch": 19.0, "eval_accuracy": 0.5330188679245284, "eval_loss": 1.1068452596664429, "eval_runtime": 1.3783, "eval_samples_per_second": 153.808, "eval_steps_per_second": 10.157, "step": 1520 }, { "epoch": 19.375, "grad_norm": 22.758438110351562, "learning_rate": 4.048507374031557e-06, "loss": 0.6943, "step": 1550 }, { "epoch": 20.0, "grad_norm": 19.057764053344727, "learning_rate": 3.2394646803277063e-06, "loss": 0.6482, "step": 1600 }, { "epoch": 20.0, "eval_accuracy": 0.5188679245283019, "eval_loss": 1.1251068115234375, "eval_runtime": 1.3783, "eval_samples_per_second": 153.811, "eval_steps_per_second": 10.157, "step": 1600 }, { "epoch": 20.625, "grad_norm": 24.637718200683594, "learning_rate": 2.5107583678831445e-06, "loss": 0.6421, "step": 1650 }, { "epoch": 21.0, "eval_accuracy": 0.5283018867924528, "eval_loss": 1.11643385887146, "eval_runtime": 1.4272, "eval_samples_per_second": 148.544, "eval_steps_per_second": 9.81, "step": 1680 }, { "epoch": 21.25, "grad_norm": 18.683311462402344, "learning_rate": 1.867366239710358e-06, "loss": 0.6103, "step": 1700 }, { "epoch": 21.875, "grad_norm": 13.443337440490723, "learning_rate": 1.313683316435793e-06, "loss": 0.6738, "step": 1750 }, { "epoch": 22.0, "eval_accuracy": 0.5377358490566038, "eval_loss": 1.1147156953811646, "eval_runtime": 1.3829, "eval_samples_per_second": 153.297, "eval_steps_per_second": 10.123, "step": 1760 }, { "epoch": 22.5, "grad_norm": 24.684009552001953, "learning_rate": 8.534918138525211e-07, "loss": 0.6459, "step": 1800 }, { "epoch": 23.0, "eval_accuracy": 0.5283018867924528, "eval_loss": 1.1151989698410034, "eval_runtime": 1.381, "eval_samples_per_second": 153.516, "eval_steps_per_second": 10.138, "step": 1840 }, { "epoch": 23.125, "grad_norm": 7.173917293548584, "learning_rate": 4.899353065512263e-07, "loss": 0.6673, "step": 1850 }, { "epoch": 23.75, "grad_norm": 9.602700233459473, "learning_rate": 2.2549725411822485e-07, "loss": 0.6302, "step": 1900 }, { "epoch": 24.0, "eval_accuracy": 0.5283018867924528, "eval_loss": 1.1155682802200317, "eval_runtime": 1.3752, "eval_samples_per_second": 154.158, "eval_steps_per_second": 10.18, "step": 1920 }, { "epoch": 24.375, "grad_norm": 34.81510925292969, "learning_rate": 6.198403658829233e-08, "loss": 0.6049, "step": 1950 }, { "epoch": 25.0, "grad_norm": 22.63814926147461, "learning_rate": 5.126150373813144e-10, "loss": 0.689, "step": 2000 }, { "epoch": 25.0, "eval_accuracy": 0.5283018867924528, "eval_loss": 1.1156727075576782, "eval_runtime": 1.3867, "eval_samples_per_second": 152.879, "eval_steps_per_second": 10.096, "step": 2000 }, { "epoch": 25.0, "step": 2000, "total_flos": 2.1204873264365568e+18, "train_loss": 0.8860943021774292, "train_runtime": 622.5434, "train_samples_per_second": 25.54, "train_steps_per_second": 3.213 } ], "logging_steps": 50, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.1204873264365568e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }