| { | |
| "best_metric": 0.6927083333333334, | |
| "best_model_checkpoint": "Output_llama3_80-20/checkpoint-1440", | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 1920, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5625, | |
| "eval_balanced_accuracy": 0.5461538461538462, | |
| "eval_loss": 0.6857039928436279, | |
| "eval_runtime": 73.4129, | |
| "eval_samples_per_second": 2.615, | |
| "eval_steps_per_second": 0.327, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5833333333333334, | |
| "eval_balanced_accuracy": 0.5758314855875832, | |
| "eval_loss": 0.6683754324913025, | |
| "eval_runtime": 104.3141, | |
| "eval_samples_per_second": 1.841, | |
| "eval_steps_per_second": 0.23, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6197916666666666, | |
| "eval_balanced_accuracy": 0.6383647798742138, | |
| "eval_loss": 0.7166934609413147, | |
| "eval_runtime": 110.585, | |
| "eval_samples_per_second": 1.736, | |
| "eval_steps_per_second": 0.217, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.625, | |
| "eval_balanced_accuracy": 0.6178571428571429, | |
| "eval_loss": 0.6334595084190369, | |
| "eval_runtime": 109.4964, | |
| "eval_samples_per_second": 1.753, | |
| "eval_steps_per_second": 0.219, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.5989583333333334, | |
| "eval_balanced_accuracy": 0.6296716417910448, | |
| "eval_loss": 0.6574041843414307, | |
| "eval_runtime": 89.6626, | |
| "eval_samples_per_second": 2.141, | |
| "eval_steps_per_second": 0.268, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.208333333333333, | |
| "grad_norm": 72.83433532714844, | |
| "learning_rate": 7.395833333333335e-06, | |
| "loss": 0.6776, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.625, | |
| "eval_balanced_accuracy": 0.6168117269812186, | |
| "eval_loss": 0.6321956515312195, | |
| "eval_runtime": 106.1665, | |
| "eval_samples_per_second": 1.808, | |
| "eval_steps_per_second": 0.226, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.609375, | |
| "eval_balanced_accuracy": 0.6114369501466275, | |
| "eval_loss": 0.6374137997627258, | |
| "eval_runtime": 107.701, | |
| "eval_samples_per_second": 1.783, | |
| "eval_steps_per_second": 0.223, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6354166666666666, | |
| "eval_balanced_accuracy": 0.6277777777777778, | |
| "eval_loss": 0.6261330246925354, | |
| "eval_runtime": 107.9832, | |
| "eval_samples_per_second": 1.778, | |
| "eval_steps_per_second": 0.222, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.640625, | |
| "eval_balanced_accuracy": 0.6650742488776332, | |
| "eval_loss": 0.6289492249488831, | |
| "eval_runtime": 111.0235, | |
| "eval_samples_per_second": 1.729, | |
| "eval_steps_per_second": 0.216, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.640625, | |
| "eval_balanced_accuracy": 0.6367950256354314, | |
| "eval_loss": 0.6082468628883362, | |
| "eval_runtime": 96.7719, | |
| "eval_samples_per_second": 1.984, | |
| "eval_steps_per_second": 0.248, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 10.416666666666666, | |
| "grad_norm": 18.0618839263916, | |
| "learning_rate": 4.791666666666668e-06, | |
| "loss": 0.5732, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.6614583333333334, | |
| "eval_balanced_accuracy": 0.6552593256413128, | |
| "eval_loss": 0.603647768497467, | |
| "eval_runtime": 103.3538, | |
| "eval_samples_per_second": 1.858, | |
| "eval_steps_per_second": 0.232, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.6510416666666666, | |
| "eval_balanced_accuracy": 0.6869850746268656, | |
| "eval_loss": 0.6444854140281677, | |
| "eval_runtime": 107.7167, | |
| "eval_samples_per_second": 1.782, | |
| "eval_steps_per_second": 0.223, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.6875, | |
| "eval_balanced_accuracy": 0.6833333333333333, | |
| "eval_loss": 0.6093500852584839, | |
| "eval_runtime": 101.5591, | |
| "eval_samples_per_second": 1.891, | |
| "eval_steps_per_second": 0.236, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_balanced_accuracy": 0.6607142857142857, | |
| "eval_loss": 0.6103670001029968, | |
| "eval_runtime": 113.5782, | |
| "eval_samples_per_second": 1.69, | |
| "eval_steps_per_second": 0.211, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.6927083333333334, | |
| "eval_balanced_accuracy": 0.6959704667751221, | |
| "eval_loss": 0.6553041338920593, | |
| "eval_runtime": 100.5241, | |
| "eval_samples_per_second": 1.91, | |
| "eval_steps_per_second": 0.239, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 15.625, | |
| "grad_norm": 14.87450885772705, | |
| "learning_rate": 2.1875000000000002e-06, | |
| "loss": 0.5144, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.6510416666666666, | |
| "eval_balanced_accuracy": 0.6603078614623419, | |
| "eval_loss": 0.6261806488037109, | |
| "eval_runtime": 106.7804, | |
| "eval_samples_per_second": 1.798, | |
| "eval_steps_per_second": 0.225, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_balanced_accuracy": 0.6619131197893813, | |
| "eval_loss": 0.6154211163520813, | |
| "eval_runtime": 86.3959, | |
| "eval_samples_per_second": 2.222, | |
| "eval_steps_per_second": 0.278, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_balanced_accuracy": 0.6619131197893813, | |
| "eval_loss": 0.6210435032844543, | |
| "eval_runtime": 88.4219, | |
| "eval_samples_per_second": 2.171, | |
| "eval_steps_per_second": 0.271, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.6770833333333334, | |
| "eval_balanced_accuracy": 0.6716186252771619, | |
| "eval_loss": 0.6292756795883179, | |
| "eval_runtime": 87.5484, | |
| "eval_samples_per_second": 2.193, | |
| "eval_steps_per_second": 0.274, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.6614583333333334, | |
| "eval_balanced_accuracy": 0.6563496426608026, | |
| "eval_loss": 0.6274305582046509, | |
| "eval_runtime": 65.5102, | |
| "eval_samples_per_second": 2.931, | |
| "eval_steps_per_second": 0.366, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 1920, | |
| "total_flos": 2.492453077307228e+17, | |
| "train_loss": 0.5602036555608113, | |
| "train_runtime": 27837.1599, | |
| "train_samples_per_second": 0.552, | |
| "train_steps_per_second": 0.069 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 1920, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 10, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 5 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.492453077307228e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |