| { |
| "best_global_step": 820, |
| "best_metric": 0.6167298555374146, |
| "best_model_checkpoint": "distilbert_km_5_v2_qnli/checkpoint-820", |
| "epoch": 7.0, |
| "eval_steps": 500, |
| "global_step": 2870, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.2907416820526123, |
| "learning_rate": 4.9e-05, |
| "loss": 0.6594, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.6415888705839282, |
| "eval_loss": 0.6286250352859497, |
| "eval_runtime": 2.2144, |
| "eval_samples_per_second": 2467.024, |
| "eval_steps_per_second": 9.935, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 2.454315662384033, |
| "learning_rate": 4.8e-05, |
| "loss": 0.6047, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.6555006406736226, |
| "eval_loss": 0.6167298555374146, |
| "eval_runtime": 2.203, |
| "eval_samples_per_second": 2479.748, |
| "eval_steps_per_second": 9.986, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 4.699812412261963, |
| "learning_rate": 4.7e-05, |
| "loss": 0.5096, |
| "step": 1230 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.641954969796815, |
| "eval_loss": 0.6637130975723267, |
| "eval_runtime": 2.2347, |
| "eval_samples_per_second": 2444.671, |
| "eval_steps_per_second": 9.845, |
| "step": 1230 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 5.798171043395996, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.3911, |
| "step": 1640 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.6624565257184697, |
| "eval_loss": 0.6993908882141113, |
| "eval_runtime": 2.2316, |
| "eval_samples_per_second": 2447.993, |
| "eval_steps_per_second": 9.858, |
| "step": 1640 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 7.124066352844238, |
| "learning_rate": 4.5e-05, |
| "loss": 0.2836, |
| "step": 2050 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.6619073768991397, |
| "eval_loss": 0.8312237858772278, |
| "eval_runtime": 2.1981, |
| "eval_samples_per_second": 2485.376, |
| "eval_steps_per_second": 10.009, |
| "step": 2050 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 8.019088745117188, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.1992, |
| "step": 2460 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.6527548965769724, |
| "eval_loss": 1.0474520921707153, |
| "eval_runtime": 2.2041, |
| "eval_samples_per_second": 2478.606, |
| "eval_steps_per_second": 9.982, |
| "step": 2460 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 9.698943138122559, |
| "learning_rate": 4.3e-05, |
| "loss": 0.1471, |
| "step": 2870 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.6606260296540363, |
| "eval_loss": 1.137702226638794, |
| "eval_runtime": 2.2077, |
| "eval_samples_per_second": 2474.567, |
| "eval_steps_per_second": 9.965, |
| "step": 2870 |
| }, |
| { |
| "epoch": 7.0, |
| "step": 2870, |
| "total_flos": 4.856261458098893e+16, |
| "train_loss": 0.3992488037003042, |
| "train_runtime": 692.1782, |
| "train_samples_per_second": 7566.188, |
| "train_steps_per_second": 29.617 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 20500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 50, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 5 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.856261458098893e+16, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|