| { | |
| "best_metric": 0.6791029572486877, | |
| "best_model_checkpoint": "./results/checkpoint-342", | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 684, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7781569965870307, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 58, | |
| 0, | |
| 1, | |
| 1, | |
| 10, | |
| 9, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 81, | |
| 0, | |
| 0, | |
| 4, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 87, | |
| 0, | |
| 2, | |
| 6, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 0, | |
| 67, | |
| 5, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 3, | |
| 4, | |
| 69, | |
| 6, | |
| 4 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 6, | |
| 1, | |
| 12, | |
| 56, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 0, | |
| 4, | |
| 31, | |
| 11, | |
| 38 | |
| ] | |
| ], | |
| "eval_f1": 0.7777605036882297, | |
| "eval_loss": 0.6863528490066528, | |
| "eval_precision": 0.8141265117353299, | |
| "eval_recall": 0.7752230960697254, | |
| "eval_runtime": 414.2099, | |
| "eval_samples_per_second": 1.415, | |
| "eval_steps_per_second": 0.179, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8003412969283277, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 72, | |
| 0, | |
| 0, | |
| 1, | |
| 4, | |
| 2, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 84, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 86, | |
| 0, | |
| 3, | |
| 4, | |
| 0 | |
| ], | |
| [ | |
| 7, | |
| 0, | |
| 0, | |
| 65, | |
| 2, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 10, | |
| 1, | |
| 2, | |
| 2, | |
| 53, | |
| 8, | |
| 11 | |
| ], | |
| [ | |
| 5, | |
| 0, | |
| 10, | |
| 1, | |
| 9, | |
| 51, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 0, | |
| 3, | |
| 17, | |
| 5, | |
| 58 | |
| ] | |
| ], | |
| "eval_f1": 0.7977110437689846, | |
| "eval_loss": 0.6791029572486877, | |
| "eval_precision": 0.8015628505641682, | |
| "eval_recall": 0.7980677533763562, | |
| "eval_runtime": 417.5215, | |
| "eval_samples_per_second": 1.404, | |
| "eval_steps_per_second": 0.177, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 0.1340806484222412, | |
| "learning_rate": 5.3801169590643275e-05, | |
| "loss": 0.3724, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8225255972696246, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 72, | |
| 0, | |
| 1, | |
| 0, | |
| 3, | |
| 3, | |
| 1 | |
| ], | |
| [ | |
| 1, | |
| 81, | |
| 1, | |
| 0, | |
| 2, | |
| 0, | |
| 4 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 86, | |
| 0, | |
| 0, | |
| 8, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 0, | |
| 0, | |
| 67, | |
| 1, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 10, | |
| 0, | |
| 3, | |
| 4, | |
| 47, | |
| 5, | |
| 18 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 5, | |
| 1, | |
| 1, | |
| 67, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 1, | |
| 3, | |
| 11, | |
| 5, | |
| 62 | |
| ] | |
| ], | |
| "eval_f1": 0.819196982637564, | |
| "eval_loss": 0.805719256401062, | |
| "eval_precision": 0.821570223887049, | |
| "eval_recall": 0.8240875601856804, | |
| "eval_runtime": 415.3032, | |
| "eval_samples_per_second": 1.411, | |
| "eval_steps_per_second": 0.178, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8242320819112628, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 71, | |
| 0, | |
| 0, | |
| 1, | |
| 3, | |
| 4, | |
| 1 | |
| ], | |
| [ | |
| 1, | |
| 82, | |
| 1, | |
| 0, | |
| 2, | |
| 1, | |
| 2 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 85, | |
| 0, | |
| 1, | |
| 8, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 0, | |
| 67, | |
| 3, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 5, | |
| 0, | |
| 2, | |
| 6, | |
| 50, | |
| 4, | |
| 20 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 5, | |
| 1, | |
| 2, | |
| 66, | |
| 1 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 0, | |
| 3, | |
| 15, | |
| 3, | |
| 62 | |
| ] | |
| ], | |
| "eval_f1": 0.8226354635422165, | |
| "eval_loss": 0.7115229368209839, | |
| "eval_precision": 0.8223101749006074, | |
| "eval_recall": 0.8254496320643309, | |
| "eval_runtime": 409.1206, | |
| "eval_samples_per_second": 1.432, | |
| "eval_steps_per_second": 0.181, | |
| "step": 684 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 684, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "total_flos": 4.234352059828961e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |