| { |
| "best_global_step": 6136, |
| "best_metric": 0.7720940709114075, |
| "best_model_checkpoint": "distilbert_km_100_v1_mnli/checkpoint-6136", |
| "epoch": 9.0, |
| "eval_steps": 500, |
| "global_step": 13806, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.0559039115905762, |
| "learning_rate": 4.9e-05, |
| "loss": 0.9885, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.5736118186449313, |
| "eval_loss": 0.9072005748748779, |
| "eval_runtime": 4.1042, |
| "eval_samples_per_second": 2391.449, |
| "eval_steps_per_second": 9.502, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.8218348026275635, |
| "learning_rate": 4.8e-05, |
| "loss": 0.8707, |
| "step": 3068 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.6334182373917473, |
| "eval_loss": 0.8225810527801514, |
| "eval_runtime": 3.9519, |
| "eval_samples_per_second": 2483.624, |
| "eval_steps_per_second": 9.869, |
| "step": 3068 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.6593018770217896, |
| "learning_rate": 4.7e-05, |
| "loss": 0.7789, |
| "step": 4602 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.6577687213448803, |
| "eval_loss": 0.7826440930366516, |
| "eval_runtime": 3.945, |
| "eval_samples_per_second": 2487.979, |
| "eval_steps_per_second": 9.886, |
| "step": 4602 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 1.9753878116607666, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.7095, |
| "step": 6136 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.6660213958227204, |
| "eval_loss": 0.7720940709114075, |
| "eval_runtime": 3.9402, |
| "eval_samples_per_second": 2490.961, |
| "eval_steps_per_second": 9.898, |
| "step": 6136 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 2.337071657180786, |
| "learning_rate": 4.5e-05, |
| "loss": 0.6453, |
| "step": 7670 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.6733571064696893, |
| "eval_loss": 0.7771408557891846, |
| "eval_runtime": 3.9647, |
| "eval_samples_per_second": 2475.607, |
| "eval_steps_per_second": 9.837, |
| "step": 7670 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 2.6085305213928223, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.5808, |
| "step": 9204 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.6682628629648497, |
| "eval_loss": 0.8248651027679443, |
| "eval_runtime": 3.9573, |
| "eval_samples_per_second": 2480.236, |
| "eval_steps_per_second": 9.855, |
| "step": 9204 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 2.5376288890838623, |
| "learning_rate": 4.3e-05, |
| "loss": 0.5166, |
| "step": 10738 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.6683647478349465, |
| "eval_loss": 0.8570982217788696, |
| "eval_runtime": 3.972, |
| "eval_samples_per_second": 2471.072, |
| "eval_steps_per_second": 9.819, |
| "step": 10738 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 2.901345729827881, |
| "learning_rate": 4.2e-05, |
| "loss": 0.4507, |
| "step": 12272 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.6682628629648497, |
| "eval_loss": 0.9123534560203552, |
| "eval_runtime": 3.9523, |
| "eval_samples_per_second": 2483.358, |
| "eval_steps_per_second": 9.868, |
| "step": 12272 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 3.4146969318389893, |
| "learning_rate": 4.1e-05, |
| "loss": 0.3906, |
| "step": 13806 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.6649006622516557, |
| "eval_loss": 1.0756607055664062, |
| "eval_runtime": 3.9483, |
| "eval_samples_per_second": 2485.892, |
| "eval_steps_per_second": 9.878, |
| "step": 13806 |
| }, |
| { |
| "epoch": 9.0, |
| "step": 13806, |
| "total_flos": 2.340951304214108e+17, |
| "train_loss": 0.6590785060121618, |
| "train_runtime": 3235.5798, |
| "train_samples_per_second": 6068.495, |
| "train_steps_per_second": 23.705 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 76700, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 50, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 5 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.340951304214108e+17, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|