| { | |
| "best_metric": 0.7978339350180506, | |
| "best_model_checkpoint": "./fine-tune/roberta-base/rte/checkpoint-780", | |
| "epoch": 10.0, | |
| "global_step": 1560, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.4729241877256318, | |
| "eval_loss": 0.7023295164108276, | |
| "eval_runtime": 0.5782, | |
| "eval_samples_per_second": 479.05, | |
| "eval_steps_per_second": 60.53, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6895306859205776, | |
| "eval_loss": 0.6356249451637268, | |
| "eval_runtime": 0.6365, | |
| "eval_samples_per_second": 435.208, | |
| "eval_steps_per_second": 54.99, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7617328519855595, | |
| "eval_loss": 0.5177018046379089, | |
| "eval_runtime": 0.6111, | |
| "eval_samples_per_second": 453.318, | |
| "eval_steps_per_second": 57.278, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 7.23055934515689e-06, | |
| "loss": 0.6131, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7472924187725631, | |
| "eval_loss": 0.6237531304359436, | |
| "eval_runtime": 0.6069, | |
| "eval_samples_per_second": 456.426, | |
| "eval_steps_per_second": 57.671, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7978339350180506, | |
| "eval_loss": 0.5446364283561707, | |
| "eval_runtime": 0.6019, | |
| "eval_samples_per_second": 460.195, | |
| "eval_steps_per_second": 58.147, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7545126353790613, | |
| "eval_loss": 0.9697452783584595, | |
| "eval_runtime": 0.6225, | |
| "eval_samples_per_second": 444.97, | |
| "eval_steps_per_second": 56.224, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 3.819918144611187e-06, | |
| "loss": 0.2528, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7689530685920578, | |
| "eval_loss": 1.100365161895752, | |
| "eval_runtime": 0.6174, | |
| "eval_samples_per_second": 448.691, | |
| "eval_steps_per_second": 56.694, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7725631768953068, | |
| "eval_loss": 1.193679928779602, | |
| "eval_runtime": 0.6461, | |
| "eval_samples_per_second": 428.72, | |
| "eval_steps_per_second": 54.17, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7725631768953068, | |
| "eval_loss": 1.331286072731018, | |
| "eval_runtime": 0.5642, | |
| "eval_samples_per_second": 490.964, | |
| "eval_steps_per_second": 62.035, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "learning_rate": 4.092769440654844e-07, | |
| "loss": 0.1073, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7725631768953068, | |
| "eval_loss": 1.353358268737793, | |
| "eval_runtime": 0.6152, | |
| "eval_samples_per_second": 450.281, | |
| "eval_steps_per_second": 56.895, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 1560, | |
| "total_flos": 1637866319616000.0, | |
| "train_loss": 0.31504388589125415, | |
| "train_runtime": 295.0697, | |
| "train_samples_per_second": 84.387, | |
| "train_steps_per_second": 5.287 | |
| } | |
| ], | |
| "max_steps": 1560, | |
| "num_train_epochs": 10, | |
| "total_flos": 1637866319616000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |