{ "best_metric": 0.8055555555555556, "best_model_checkpoint": "test/checkpoint-6", "epoch": 19.727272727272727, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.73, "eval_accuracy": 0.6388888888888888, "eval_f1": 0.6477582846003898, "eval_loss": 0.679732084274292, "eval_precision": 0.6572420634920635, "eval_recall": 0.6388888888888888, "eval_runtime": 5.9427, "eval_samples_per_second": 6.058, "eval_steps_per_second": 0.168, "step": 2 }, { "epoch": 1.73, "eval_accuracy": 0.75, "eval_f1": 0.6904761904761904, "eval_loss": 0.6750033497810364, "eval_precision": 0.6397058823529411, "eval_recall": 0.75, "eval_runtime": 4.9005, "eval_samples_per_second": 7.346, "eval_steps_per_second": 0.204, "step": 4 }, { "epoch": 2.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.6533546447753906, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 4.9351, "eval_samples_per_second": 7.295, "eval_steps_per_second": 0.203, "step": 6 }, { "epoch": 3.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.6294739842414856, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 4.7154, "eval_samples_per_second": 7.635, "eval_steps_per_second": 0.212, "step": 8 }, { "epoch": 4.73, "learning_rate": 0.0001, "loss": 0.8452, "step": 10 }, { "epoch": 4.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.6180709600448608, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 5.3103, "eval_samples_per_second": 6.779, "eval_steps_per_second": 0.188, "step": 10 }, { "epoch": 5.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.6019383668899536, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 4.7577, "eval_samples_per_second": 7.567, "eval_steps_per_second": 0.21, "step": 12 }, { "epoch": 6.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.6006457805633545, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 4.7756, "eval_samples_per_second": 7.538, "eval_steps_per_second": 0.209, "step": 14 }, { "epoch": 7.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.584420919418335, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 5.7897, "eval_samples_per_second": 6.218, "eval_steps_per_second": 0.173, "step": 16 }, { "epoch": 8.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.5822530388832092, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 4.7388, "eval_samples_per_second": 7.597, "eval_steps_per_second": 0.211, "step": 18 }, { "epoch": 9.73, "learning_rate": 6.666666666666667e-05, "loss": 0.7824, "step": 20 }, { "epoch": 9.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.5739169716835022, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 4.8104, "eval_samples_per_second": 7.484, "eval_steps_per_second": 0.208, "step": 20 }, { "epoch": 10.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.5697076916694641, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 4.6974, "eval_samples_per_second": 7.664, "eval_steps_per_second": 0.213, "step": 22 }, { "epoch": 11.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.5695454478263855, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 4.9353, "eval_samples_per_second": 7.294, "eval_steps_per_second": 0.203, "step": 24 }, { "epoch": 12.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.5626423954963684, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 5.8521, "eval_samples_per_second": 6.152, "eval_steps_per_second": 0.171, "step": 26 }, { "epoch": 13.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.5666201710700989, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 5.1217, "eval_samples_per_second": 7.029, "eval_steps_per_second": 0.195, "step": 28 }, { "epoch": 14.73, "learning_rate": 3.3333333333333335e-05, "loss": 0.7149, "step": 30 }, { "epoch": 14.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.5559208989143372, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 5.1084, "eval_samples_per_second": 7.047, "eval_steps_per_second": 0.196, "step": 30 }, { "epoch": 15.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.5556015372276306, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 6.0749, "eval_samples_per_second": 5.926, "eval_steps_per_second": 0.165, "step": 32 }, { "epoch": 16.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.5465428233146667, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 4.8659, "eval_samples_per_second": 7.398, "eval_steps_per_second": 0.206, "step": 34 }, { "epoch": 17.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.5558464527130127, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 4.8604, "eval_samples_per_second": 7.407, "eval_steps_per_second": 0.206, "step": 36 }, { "epoch": 18.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.5543624758720398, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 4.7553, "eval_samples_per_second": 7.57, "eval_steps_per_second": 0.21, "step": 38 }, { "epoch": 19.73, "learning_rate": 0.0, "loss": 0.6869, "step": 40 }, { "epoch": 19.73, "eval_accuracy": 0.8055555555555556, "eval_f1": 0.7188034188034188, "eval_loss": 0.5504012107849121, "eval_precision": 0.6489197530864197, "eval_recall": 0.8055555555555556, "eval_runtime": 5.3116, "eval_samples_per_second": 6.778, "eval_steps_per_second": 0.188, "step": 40 }, { "epoch": 19.73, "step": 40, "total_flos": 2.8684782950326272e+17, "train_loss": 0.7573485970497131, "train_runtime": 850.4827, "train_samples_per_second": 16.085, "train_steps_per_second": 0.047 } ], "max_steps": 40, "num_train_epochs": 20, "total_flos": 2.8684782950326272e+17, "trial_name": null, "trial_params": null }