{ "best_global_step": 30, "best_metric": 0.6790599226951599, "best_model_checkpoint": "tinybert_base_train_kd_rte/checkpoint-30", "epoch": 8.0, "eval_steps": 500, "global_step": 80, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.5859740972518921, "learning_rate": 4.91e-05, "loss": 0.705, "step": 10 }, { "epoch": 1.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.691801130771637, "eval_runtime": 0.1452, "eval_samples_per_second": 1907.112, "eval_steps_per_second": 13.77, "step": 10 }, { "epoch": 2.0, "grad_norm": 2.07582426071167, "learning_rate": 4.8100000000000004e-05, "loss": 0.695, "step": 20 }, { "epoch": 2.0, "eval_accuracy": 0.48014440433212996, "eval_loss": 0.7236151695251465, "eval_runtime": 0.1475, "eval_samples_per_second": 1877.96, "eval_steps_per_second": 13.559, "step": 20 }, { "epoch": 3.0, "grad_norm": 0.8762505054473877, "learning_rate": 4.71e-05, "loss": 0.6831, "step": 30 }, { "epoch": 3.0, "eval_accuracy": 0.5848375451263538, "eval_loss": 0.6790599226951599, "eval_runtime": 0.1418, "eval_samples_per_second": 1953.151, "eval_steps_per_second": 14.102, "step": 30 }, { "epoch": 4.0, "grad_norm": 3.220576286315918, "learning_rate": 4.61e-05, "loss": 0.6248, "step": 40 }, { "epoch": 4.0, "eval_accuracy": 0.5884476534296029, "eval_loss": 0.6915948390960693, "eval_runtime": 0.1392, "eval_samples_per_second": 1989.957, "eval_steps_per_second": 14.368, "step": 40 }, { "epoch": 5.0, "grad_norm": 5.529593467712402, "learning_rate": 4.5100000000000005e-05, "loss": 0.5451, "step": 50 }, { "epoch": 5.0, "eval_accuracy": 0.6245487364620939, "eval_loss": 0.7247257232666016, "eval_runtime": 0.1471, "eval_samples_per_second": 1883.162, "eval_steps_per_second": 13.597, "step": 50 }, { "epoch": 6.0, "grad_norm": 4.2390875816345215, "learning_rate": 4.41e-05, "loss": 0.4932, "step": 60 }, { "epoch": 6.0, "eval_accuracy": 0.5884476534296029, "eval_loss": 0.7858375310897827, "eval_runtime": 0.1629, "eval_samples_per_second": 1700.836, "eval_steps_per_second": 12.28, "step": 60 }, { "epoch": 7.0, "grad_norm": 5.121857643127441, "learning_rate": 4.3100000000000004e-05, "loss": 0.3597, "step": 70 }, { "epoch": 7.0, "eval_accuracy": 0.6028880866425993, "eval_loss": 0.9329966306686401, "eval_runtime": 0.1713, "eval_samples_per_second": 1617.04, "eval_steps_per_second": 11.675, "step": 70 }, { "epoch": 8.0, "grad_norm": 3.846261501312256, "learning_rate": 4.21e-05, "loss": 0.268, "step": 80 }, { "epoch": 8.0, "eval_accuracy": 0.6209386281588448, "eval_loss": 0.9830819964408875, "eval_runtime": 0.1514, "eval_samples_per_second": 1829.592, "eval_steps_per_second": 13.21, "step": 80 }, { "epoch": 8.0, "step": 80, "total_flos": 1319375290613760.0, "train_loss": 0.5467315256595612, "train_runtime": 34.017, "train_samples_per_second": 3659.931, "train_steps_per_second": 14.699 } ], "logging_steps": 1, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1319375290613760.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }