{ "best_metric": 0.5377358490566038, "best_model_checkpoint": "./Validated_Balanced_Raw_Data_model_boost6_outputs/checkpoint-640", "epoch": 20.0, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.625, "grad_norm": 10.34670639038086, "learning_rate": 6.25e-06, "loss": 1.3904, "step": 50 }, { "epoch": 1.0, "eval_accuracy": 0.36792452830188677, "eval_loss": 1.330020546913147, "eval_runtime": 1.7586, "eval_samples_per_second": 120.547, "eval_steps_per_second": 15.353, "step": 80 }, { "epoch": 1.25, "grad_norm": 19.34177589416504, "learning_rate": 1.25e-05, "loss": 1.3468, "step": 100 }, { "epoch": 1.875, "grad_norm": 38.68441390991211, "learning_rate": 1.8750000000000002e-05, "loss": 1.3255, "step": 150 }, { "epoch": 2.0, "eval_accuracy": 0.3867924528301887, "eval_loss": 1.2852892875671387, "eval_runtime": 1.7689, "eval_samples_per_second": 119.847, "eval_steps_per_second": 15.264, "step": 160 }, { "epoch": 2.5, "grad_norm": 24.609846115112305, "learning_rate": 1.9961946980917457e-05, "loss": 1.2779, "step": 200 }, { "epoch": 3.0, "eval_accuracy": 0.4339622641509434, "eval_loss": 1.2196338176727295, "eval_runtime": 1.7492, "eval_samples_per_second": 121.197, "eval_steps_per_second": 15.435, "step": 240 }, { "epoch": 3.125, "grad_norm": 36.32752990722656, "learning_rate": 1.9807852804032306e-05, "loss": 1.258, "step": 250 }, { "epoch": 3.75, "grad_norm": 12.139269828796387, "learning_rate": 1.953716950748227e-05, "loss": 1.2267, "step": 300 }, { "epoch": 4.0, "eval_accuracy": 0.4528301886792453, "eval_loss": 1.1913784742355347, "eval_runtime": 1.7493, "eval_samples_per_second": 121.191, "eval_steps_per_second": 15.435, "step": 320 }, { "epoch": 4.375, "grad_norm": 15.388509750366211, "learning_rate": 1.9153114791194475e-05, "loss": 1.1845, "step": 350 }, { "epoch": 5.0, "grad_norm": 39.35362243652344, "learning_rate": 1.866025403784439e-05, "loss": 1.1508, "step": 400 }, { "epoch": 5.0, "eval_accuracy": 0.5047169811320755, "eval_loss": 1.1553481817245483, "eval_runtime": 1.7581, "eval_samples_per_second": 120.586, "eval_steps_per_second": 15.358, "step": 400 }, { "epoch": 5.625, "grad_norm": 20.53506088256836, "learning_rate": 1.806444604267483e-05, "loss": 1.0964, "step": 450 }, { "epoch": 6.0, "eval_accuracy": 0.47641509433962265, "eval_loss": 1.2144731283187866, "eval_runtime": 1.7625, "eval_samples_per_second": 120.285, "eval_steps_per_second": 15.319, "step": 480 }, { "epoch": 6.25, "grad_norm": 16.791868209838867, "learning_rate": 1.737277336810124e-05, "loss": 1.0929, "step": 500 }, { "epoch": 6.875, "grad_norm": 17.93596076965332, "learning_rate": 1.659345815100069e-05, "loss": 1.0742, "step": 550 }, { "epoch": 7.0, "eval_accuracy": 0.5, "eval_loss": 1.1813852787017822, "eval_runtime": 1.767, "eval_samples_per_second": 119.974, "eval_steps_per_second": 15.28, "step": 560 }, { "epoch": 7.5, "grad_norm": 28.824607849121094, "learning_rate": 1.573576436351046e-05, "loss": 1.0315, "step": 600 }, { "epoch": 8.0, "eval_accuracy": 0.5377358490566038, "eval_loss": 1.1222487688064575, "eval_runtime": 1.7728, "eval_samples_per_second": 119.585, "eval_steps_per_second": 15.23, "step": 640 }, { "epoch": 8.125, "grad_norm": 17.935773849487305, "learning_rate": 1.4809887689193878e-05, "loss": 1.0158, "step": 650 }, { "epoch": 8.75, "grad_norm": 19.609621047973633, "learning_rate": 1.3826834323650899e-05, "loss": 1.0283, "step": 700 }, { "epoch": 9.0, "eval_accuracy": 0.5188679245283019, "eval_loss": 1.1560615301132202, "eval_runtime": 1.7551, "eval_samples_per_second": 120.791, "eval_steps_per_second": 15.384, "step": 720 }, { "epoch": 9.375, "grad_norm": 19.259183883666992, "learning_rate": 1.2798290140309924e-05, "loss": 0.9402, "step": 750 }, { "epoch": 10.0, "grad_norm": 30.109878540039062, "learning_rate": 1.1736481776669307e-05, "loss": 0.999, "step": 800 }, { "epoch": 10.0, "eval_accuracy": 0.5094339622641509, "eval_loss": 1.1939640045166016, "eval_runtime": 1.7667, "eval_samples_per_second": 119.998, "eval_steps_per_second": 15.283, "step": 800 }, { "epoch": 10.625, "grad_norm": 35.87038803100586, "learning_rate": 1.0654031292301432e-05, "loss": 0.961, "step": 850 }, { "epoch": 11.0, "eval_accuracy": 0.5047169811320755, "eval_loss": 1.143951654434204, "eval_runtime": 1.7675, "eval_samples_per_second": 119.943, "eval_steps_per_second": 15.276, "step": 880 }, { "epoch": 11.25, "grad_norm": 31.919300079345703, "learning_rate": 9.563806126346643e-06, "loss": 0.9398, "step": 900 }, { "epoch": 11.875, "grad_norm": 18.056291580200195, "learning_rate": 8.478766138100834e-06, "loss": 0.9484, "step": 950 }, { "epoch": 12.0, "eval_accuracy": 0.5, "eval_loss": 1.1715978384017944, "eval_runtime": 1.7628, "eval_samples_per_second": 120.264, "eval_steps_per_second": 15.317, "step": 960 }, { "epoch": 12.5, "grad_norm": 28.943151473999023, "learning_rate": 7.411809548974792e-06, "loss": 0.8779, "step": 1000 }, { "epoch": 13.0, "eval_accuracy": 0.5141509433962265, "eval_loss": 1.1549073457717896, "eval_runtime": 1.75, "eval_samples_per_second": 121.143, "eval_steps_per_second": 15.429, "step": 1040 }, { "epoch": 13.125, "grad_norm": 13.596811294555664, "learning_rate": 6.375619617162985e-06, "loss": 0.8824, "step": 1050 }, { "epoch": 13.75, "grad_norm": 21.678871154785156, "learning_rate": 5.382513867649663e-06, "loss": 0.8613, "step": 1100 }, { "epoch": 14.0, "eval_accuracy": 0.5283018867924528, "eval_loss": 1.1524059772491455, "eval_runtime": 1.7481, "eval_samples_per_second": 121.275, "eval_steps_per_second": 15.445, "step": 1120 }, { "epoch": 14.375, "grad_norm": 33.053497314453125, "learning_rate": 4.444297669803981e-06, "loss": 0.8734, "step": 1150 }, { "epoch": 15.0, "grad_norm": 30.25782012939453, "learning_rate": 3.5721239031346067e-06, "loss": 0.8572, "step": 1200 }, { "epoch": 15.0, "eval_accuracy": 0.5188679245283019, "eval_loss": 1.1643953323364258, "eval_runtime": 1.749, "eval_samples_per_second": 121.212, "eval_steps_per_second": 15.437, "step": 1200 }, { "epoch": 15.625, "grad_norm": 31.462533950805664, "learning_rate": 2.776360379402445e-06, "loss": 0.8605, "step": 1250 }, { "epoch": 16.0, "eval_accuracy": 0.5235849056603774, "eval_loss": 1.1535687446594238, "eval_runtime": 1.7604, "eval_samples_per_second": 120.424, "eval_steps_per_second": 15.337, "step": 1280 }, { "epoch": 16.25, "grad_norm": 23.692352294921875, "learning_rate": 2.0664665970876496e-06, "loss": 0.7938, "step": 1300 }, { "epoch": 16.875, "grad_norm": 31.5648250579834, "learning_rate": 1.4508812932705364e-06, "loss": 0.8268, "step": 1350 }, { "epoch": 17.0, "eval_accuracy": 0.5283018867924528, "eval_loss": 1.1561096906661987, "eval_runtime": 1.747, "eval_samples_per_second": 121.354, "eval_steps_per_second": 15.455, "step": 1360 }, { "epoch": 17.5, "grad_norm": 15.69443130493164, "learning_rate": 9.369221296335007e-07, "loss": 0.8171, "step": 1400 }, { "epoch": 18.0, "eval_accuracy": 0.5283018867924528, "eval_loss": 1.158867597579956, "eval_runtime": 1.7457, "eval_samples_per_second": 121.443, "eval_steps_per_second": 15.467, "step": 1440 }, { "epoch": 18.125, "grad_norm": 17.494001388549805, "learning_rate": 5.306987050489442e-07, "loss": 0.8575, "step": 1450 }, { "epoch": 18.75, "grad_norm": 42.6074104309082, "learning_rate": 2.370399288006664e-07, "loss": 0.8242, "step": 1500 }, { "epoch": 19.0, "eval_accuracy": 0.5235849056603774, "eval_loss": 1.1594204902648926, "eval_runtime": 1.7582, "eval_samples_per_second": 120.576, "eval_steps_per_second": 15.356, "step": 1520 }, { "epoch": 19.375, "grad_norm": 31.484249114990234, "learning_rate": 5.943661777680354e-08, "loss": 0.8017, "step": 1550 }, { "epoch": 20.0, "grad_norm": 31.914243698120117, "learning_rate": 0.0, "loss": 0.7743, "step": 1600 }, { "epoch": 20.0, "eval_accuracy": 0.5283018867924528, "eval_loss": 1.1595797538757324, "eval_runtime": 3.0827, "eval_samples_per_second": 68.772, "eval_steps_per_second": 8.759, "step": 1600 }, { "epoch": 20.0, "step": 1600, "total_flos": 1.0060366441866854e+18, "train_loss": 1.006125464439392, "train_runtime": 409.8605, "train_samples_per_second": 31.035, "train_steps_per_second": 3.904 } ], "logging_steps": 50, "max_steps": 1600, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0060366441866854e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }