| { |
| "best_global_step": 820, |
| "best_metric": 0.6383672952651978, |
| "best_model_checkpoint": "distilbert_rand_50_v2_qnli/checkpoint-820", |
| "epoch": 7.0, |
| "eval_steps": 500, |
| "global_step": 2870, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.7452472448349, |
| "learning_rate": 4.9e-05, |
| "loss": 0.6644, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.6225517115138203, |
| "eval_loss": 0.6443765163421631, |
| "eval_runtime": 2.2578, |
| "eval_samples_per_second": 2419.607, |
| "eval_steps_per_second": 9.744, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.5724531412124634, |
| "learning_rate": 4.8e-05, |
| "loss": 0.6252, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.6313380926231008, |
| "eval_loss": 0.6383672952651978, |
| "eval_runtime": 2.2501, |
| "eval_samples_per_second": 2427.852, |
| "eval_steps_per_second": 9.777, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 3.1962156295776367, |
| "learning_rate": 4.7e-05, |
| "loss": 0.56, |
| "step": 1230 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.6267618524620172, |
| "eval_loss": 0.6711590886116028, |
| "eval_runtime": 2.2182, |
| "eval_samples_per_second": 2462.766, |
| "eval_steps_per_second": 9.918, |
| "step": 1230 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 5.182791233062744, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.4515, |
| "step": 1640 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.627677100494234, |
| "eval_loss": 0.7168024778366089, |
| "eval_runtime": 2.2331, |
| "eval_samples_per_second": 2446.429, |
| "eval_steps_per_second": 9.852, |
| "step": 1640 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 5.040902137756348, |
| "learning_rate": 4.5e-05, |
| "loss": 0.3311, |
| "step": 2050 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.6300567453779974, |
| "eval_loss": 0.8598495125770569, |
| "eval_runtime": 2.245, |
| "eval_samples_per_second": 2433.445, |
| "eval_steps_per_second": 9.8, |
| "step": 2050 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 9.71996784210205, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.234, |
| "step": 2460 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.6214534138751602, |
| "eval_loss": 1.1353455781936646, |
| "eval_runtime": 2.2452, |
| "eval_samples_per_second": 2433.2, |
| "eval_steps_per_second": 9.799, |
| "step": 2460 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 10.71358585357666, |
| "learning_rate": 4.3e-05, |
| "loss": 0.1707, |
| "step": 2870 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.6148636280431997, |
| "eval_loss": 1.2543296813964844, |
| "eval_runtime": 2.2301, |
| "eval_samples_per_second": 2449.692, |
| "eval_steps_per_second": 9.865, |
| "step": 2870 |
| }, |
| { |
| "epoch": 7.0, |
| "step": 2870, |
| "total_flos": 4.856261458098893e+16, |
| "train_loss": 0.4338497254906631, |
| "train_runtime": 690.2912, |
| "train_samples_per_second": 7586.871, |
| "train_steps_per_second": 29.698 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 20500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 50, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 5 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.856261458098893e+16, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|