| { | |
| "best_metric": 0.5377358490566038, | |
| "best_model_checkpoint": "./Validated_Balanced_Raw_Data_model_boost6_outputs/checkpoint-640", | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 1600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 10.34670639038086, | |
| "learning_rate": 6.25e-06, | |
| "loss": 1.3904, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.36792452830188677, | |
| "eval_loss": 1.330020546913147, | |
| "eval_runtime": 1.7586, | |
| "eval_samples_per_second": 120.547, | |
| "eval_steps_per_second": 15.353, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 19.34177589416504, | |
| "learning_rate": 1.25e-05, | |
| "loss": 1.3468, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 38.68441390991211, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 1.3255, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.3867924528301887, | |
| "eval_loss": 1.2852892875671387, | |
| "eval_runtime": 1.7689, | |
| "eval_samples_per_second": 119.847, | |
| "eval_steps_per_second": 15.264, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 24.609846115112305, | |
| "learning_rate": 1.9961946980917457e-05, | |
| "loss": 1.2779, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.4339622641509434, | |
| "eval_loss": 1.2196338176727295, | |
| "eval_runtime": 1.7492, | |
| "eval_samples_per_second": 121.197, | |
| "eval_steps_per_second": 15.435, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "grad_norm": 36.32752990722656, | |
| "learning_rate": 1.9807852804032306e-05, | |
| "loss": 1.258, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 12.139269828796387, | |
| "learning_rate": 1.953716950748227e-05, | |
| "loss": 1.2267, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.4528301886792453, | |
| "eval_loss": 1.1913784742355347, | |
| "eval_runtime": 1.7493, | |
| "eval_samples_per_second": 121.191, | |
| "eval_steps_per_second": 15.435, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.375, | |
| "grad_norm": 15.388509750366211, | |
| "learning_rate": 1.9153114791194475e-05, | |
| "loss": 1.1845, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 39.35362243652344, | |
| "learning_rate": 1.866025403784439e-05, | |
| "loss": 1.1508, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.5047169811320755, | |
| "eval_loss": 1.1553481817245483, | |
| "eval_runtime": 1.7581, | |
| "eval_samples_per_second": 120.586, | |
| "eval_steps_per_second": 15.358, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.625, | |
| "grad_norm": 20.53506088256836, | |
| "learning_rate": 1.806444604267483e-05, | |
| "loss": 1.0964, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.47641509433962265, | |
| "eval_loss": 1.2144731283187866, | |
| "eval_runtime": 1.7625, | |
| "eval_samples_per_second": 120.285, | |
| "eval_steps_per_second": 15.319, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 16.791868209838867, | |
| "learning_rate": 1.737277336810124e-05, | |
| "loss": 1.0929, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.875, | |
| "grad_norm": 17.93596076965332, | |
| "learning_rate": 1.659345815100069e-05, | |
| "loss": 1.0742, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.1813852787017822, | |
| "eval_runtime": 1.767, | |
| "eval_samples_per_second": 119.974, | |
| "eval_steps_per_second": 15.28, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 28.824607849121094, | |
| "learning_rate": 1.573576436351046e-05, | |
| "loss": 1.0315, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5377358490566038, | |
| "eval_loss": 1.1222487688064575, | |
| "eval_runtime": 1.7728, | |
| "eval_samples_per_second": 119.585, | |
| "eval_steps_per_second": 15.23, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 8.125, | |
| "grad_norm": 17.935773849487305, | |
| "learning_rate": 1.4809887689193878e-05, | |
| "loss": 1.0158, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "grad_norm": 19.609621047973633, | |
| "learning_rate": 1.3826834323650899e-05, | |
| "loss": 1.0283, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.5188679245283019, | |
| "eval_loss": 1.1560615301132202, | |
| "eval_runtime": 1.7551, | |
| "eval_samples_per_second": 120.791, | |
| "eval_steps_per_second": 15.384, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 9.375, | |
| "grad_norm": 19.259183883666992, | |
| "learning_rate": 1.2798290140309924e-05, | |
| "loss": 0.9402, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 30.109878540039062, | |
| "learning_rate": 1.1736481776669307e-05, | |
| "loss": 0.999, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.5094339622641509, | |
| "eval_loss": 1.1939640045166016, | |
| "eval_runtime": 1.7667, | |
| "eval_samples_per_second": 119.998, | |
| "eval_steps_per_second": 15.283, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 10.625, | |
| "grad_norm": 35.87038803100586, | |
| "learning_rate": 1.0654031292301432e-05, | |
| "loss": 0.961, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.5047169811320755, | |
| "eval_loss": 1.143951654434204, | |
| "eval_runtime": 1.7675, | |
| "eval_samples_per_second": 119.943, | |
| "eval_steps_per_second": 15.276, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "grad_norm": 31.919300079345703, | |
| "learning_rate": 9.563806126346643e-06, | |
| "loss": 0.9398, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 11.875, | |
| "grad_norm": 18.056291580200195, | |
| "learning_rate": 8.478766138100834e-06, | |
| "loss": 0.9484, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.1715978384017944, | |
| "eval_runtime": 1.7628, | |
| "eval_samples_per_second": 120.264, | |
| "eval_steps_per_second": 15.317, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 28.943151473999023, | |
| "learning_rate": 7.411809548974792e-06, | |
| "loss": 0.8779, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.5141509433962265, | |
| "eval_loss": 1.1549073457717896, | |
| "eval_runtime": 1.75, | |
| "eval_samples_per_second": 121.143, | |
| "eval_steps_per_second": 15.429, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 13.125, | |
| "grad_norm": 13.596811294555664, | |
| "learning_rate": 6.375619617162985e-06, | |
| "loss": 0.8824, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "grad_norm": 21.678871154785156, | |
| "learning_rate": 5.382513867649663e-06, | |
| "loss": 0.8613, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.1524059772491455, | |
| "eval_runtime": 1.7481, | |
| "eval_samples_per_second": 121.275, | |
| "eval_steps_per_second": 15.445, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 14.375, | |
| "grad_norm": 33.053497314453125, | |
| "learning_rate": 4.444297669803981e-06, | |
| "loss": 0.8734, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 30.25782012939453, | |
| "learning_rate": 3.5721239031346067e-06, | |
| "loss": 0.8572, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.5188679245283019, | |
| "eval_loss": 1.1643953323364258, | |
| "eval_runtime": 1.749, | |
| "eval_samples_per_second": 121.212, | |
| "eval_steps_per_second": 15.437, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 15.625, | |
| "grad_norm": 31.462533950805664, | |
| "learning_rate": 2.776360379402445e-06, | |
| "loss": 0.8605, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.5235849056603774, | |
| "eval_loss": 1.1535687446594238, | |
| "eval_runtime": 1.7604, | |
| "eval_samples_per_second": 120.424, | |
| "eval_steps_per_second": 15.337, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "grad_norm": 23.692352294921875, | |
| "learning_rate": 2.0664665970876496e-06, | |
| "loss": 0.7938, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 16.875, | |
| "grad_norm": 31.5648250579834, | |
| "learning_rate": 1.4508812932705364e-06, | |
| "loss": 0.8268, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.1561096906661987, | |
| "eval_runtime": 1.747, | |
| "eval_samples_per_second": 121.354, | |
| "eval_steps_per_second": 15.455, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "grad_norm": 15.69443130493164, | |
| "learning_rate": 9.369221296335007e-07, | |
| "loss": 0.8171, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.158867597579956, | |
| "eval_runtime": 1.7457, | |
| "eval_samples_per_second": 121.443, | |
| "eval_steps_per_second": 15.467, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 18.125, | |
| "grad_norm": 17.494001388549805, | |
| "learning_rate": 5.306987050489442e-07, | |
| "loss": 0.8575, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "grad_norm": 42.6074104309082, | |
| "learning_rate": 2.370399288006664e-07, | |
| "loss": 0.8242, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.5235849056603774, | |
| "eval_loss": 1.1594204902648926, | |
| "eval_runtime": 1.7582, | |
| "eval_samples_per_second": 120.576, | |
| "eval_steps_per_second": 15.356, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 19.375, | |
| "grad_norm": 31.484249114990234, | |
| "learning_rate": 5.943661777680354e-08, | |
| "loss": 0.8017, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 31.914243698120117, | |
| "learning_rate": 0.0, | |
| "loss": 0.7743, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.1595797538757324, | |
| "eval_runtime": 3.0827, | |
| "eval_samples_per_second": 68.772, | |
| "eval_steps_per_second": 8.759, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 1600, | |
| "total_flos": 1.0060366441866854e+18, | |
| "train_loss": 1.006125464439392, | |
| "train_runtime": 409.8605, | |
| "train_samples_per_second": 31.035, | |
| "train_steps_per_second": 3.904 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1600, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0060366441866854e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |