| { | |
| "best_global_step": 30, | |
| "best_metric": 0.6790599226951599, | |
| "best_model_checkpoint": "tinybert_base_train_kd_rte/checkpoint-30", | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 80, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.5859740972518921, | |
| "learning_rate": 4.91e-05, | |
| "loss": 0.705, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5270758122743683, | |
| "eval_loss": 0.691801130771637, | |
| "eval_runtime": 0.1452, | |
| "eval_samples_per_second": 1907.112, | |
| "eval_steps_per_second": 13.77, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.07582426071167, | |
| "learning_rate": 4.8100000000000004e-05, | |
| "loss": 0.695, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.48014440433212996, | |
| "eval_loss": 0.7236151695251465, | |
| "eval_runtime": 0.1475, | |
| "eval_samples_per_second": 1877.96, | |
| "eval_steps_per_second": 13.559, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.8762505054473877, | |
| "learning_rate": 4.71e-05, | |
| "loss": 0.6831, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5848375451263538, | |
| "eval_loss": 0.6790599226951599, | |
| "eval_runtime": 0.1418, | |
| "eval_samples_per_second": 1953.151, | |
| "eval_steps_per_second": 14.102, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 3.220576286315918, | |
| "learning_rate": 4.61e-05, | |
| "loss": 0.6248, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.5884476534296029, | |
| "eval_loss": 0.6915948390960693, | |
| "eval_runtime": 0.1392, | |
| "eval_samples_per_second": 1989.957, | |
| "eval_steps_per_second": 14.368, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 5.529593467712402, | |
| "learning_rate": 4.5100000000000005e-05, | |
| "loss": 0.5451, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.6245487364620939, | |
| "eval_loss": 0.7247257232666016, | |
| "eval_runtime": 0.1471, | |
| "eval_samples_per_second": 1883.162, | |
| "eval_steps_per_second": 13.597, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 4.2390875816345215, | |
| "learning_rate": 4.41e-05, | |
| "loss": 0.4932, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.5884476534296029, | |
| "eval_loss": 0.7858375310897827, | |
| "eval_runtime": 0.1629, | |
| "eval_samples_per_second": 1700.836, | |
| "eval_steps_per_second": 12.28, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 5.121857643127441, | |
| "learning_rate": 4.3100000000000004e-05, | |
| "loss": 0.3597, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.6028880866425993, | |
| "eval_loss": 0.9329966306686401, | |
| "eval_runtime": 0.1713, | |
| "eval_samples_per_second": 1617.04, | |
| "eval_steps_per_second": 11.675, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 3.846261501312256, | |
| "learning_rate": 4.21e-05, | |
| "loss": 0.268, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6209386281588448, | |
| "eval_loss": 0.9830819964408875, | |
| "eval_runtime": 0.1514, | |
| "eval_samples_per_second": 1829.592, | |
| "eval_steps_per_second": 13.21, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 80, | |
| "total_flos": 1319375290613760.0, | |
| "train_loss": 0.5467315256595612, | |
| "train_runtime": 34.017, | |
| "train_samples_per_second": 3659.931, | |
| "train_steps_per_second": 14.699 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 5 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1319375290613760.0, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |