| { |
| "best_global_step": 5538, |
| "best_metric": 0.8634780589737636, |
| "best_model_checkpoint": "outputs/bartpho-hsd/checkpoint-best/checkpoint-5538", |
| "epoch": 6.0, |
| "eval_steps": 500, |
| "global_step": 5538, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.5697989463806152, |
| "learning_rate": 4.998769072267837e-05, |
| "loss": 0.5389, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8428140236823776, |
| "eval_loss": 0.5179033875465393, |
| "eval_runtime": 60.9906, |
| "eval_samples_per_second": 70.617, |
| "eval_steps_per_second": 2.213, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 3.148050308227539, |
| "learning_rate": 4.995072162589518e-05, |
| "loss": 0.5208, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.8428140236823776, |
| "eval_loss": 0.5180655121803284, |
| "eval_runtime": 55.4284, |
| "eval_samples_per_second": 77.704, |
| "eval_steps_per_second": 2.436, |
| "step": 1846 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 6.275445461273193, |
| "learning_rate": 4.988912917920435e-05, |
| "loss": 0.4785, |
| "step": 2769 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.850243789180404, |
| "eval_loss": 0.48658695816993713, |
| "eval_runtime": 55.6378, |
| "eval_samples_per_second": 77.411, |
| "eval_steps_per_second": 2.426, |
| "step": 2769 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 2.8800065517425537, |
| "learning_rate": 4.980297416691463e-05, |
| "loss": 0.4072, |
| "step": 3692 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.8379382400742976, |
| "eval_loss": 0.45363402366638184, |
| "eval_runtime": 55.9448, |
| "eval_samples_per_second": 76.987, |
| "eval_steps_per_second": 2.413, |
| "step": 3692 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 2.930692195892334, |
| "learning_rate": 4.969234161362153e-05, |
| "loss": 0.372, |
| "step": 4615 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.8576735546784304, |
| "eval_loss": 0.4497613310813904, |
| "eval_runtime": 55.8766, |
| "eval_samples_per_second": 77.081, |
| "eval_steps_per_second": 2.416, |
| "step": 4615 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 3.390305995941162, |
| "learning_rate": 4.9557340700298316e-05, |
| "loss": 0.345, |
| "step": 5538 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.8634780589737636, |
| "eval_loss": 0.4286285638809204, |
| "eval_runtime": 55.8527, |
| "eval_samples_per_second": 77.114, |
| "eval_steps_per_second": 2.417, |
| "step": 5538 |
| } |
| ], |
| "logging_steps": 923, |
| "max_steps": 92300, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.625341380307149e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|