{ "best_metric": 0.5884530544281006, "best_model_checkpoint": "tiny_bert_km_100_v1_mrpc/checkpoint-60", "epoch": 9.0, "eval_steps": 500, "global_step": 135, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.8016844987869263, "learning_rate": 4.9e-05, "loss": 0.6262, "step": 15 }, { "epoch": 1.0, "eval_accuracy": 0.696078431372549, "eval_combined_score": 0.7511642156862746, "eval_f1": 0.80625, "eval_loss": 0.6085503697395325, "eval_runtime": 0.2153, "eval_samples_per_second": 1894.685, "eval_steps_per_second": 9.288, "step": 15 }, { "epoch": 2.0, "grad_norm": 1.281128168106079, "learning_rate": 4.8e-05, "loss": 0.599, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.7034313725490197, "eval_combined_score": 0.7612822034643454, "eval_f1": 0.8191330343796711, "eval_loss": 0.6000078320503235, "eval_runtime": 0.1728, "eval_samples_per_second": 2360.938, "eval_steps_per_second": 11.573, "step": 30 }, { "epoch": 3.0, "grad_norm": 1.0308752059936523, "learning_rate": 4.7e-05, "loss": 0.5725, "step": 45 }, { "epoch": 3.0, "eval_accuracy": 0.7058823529411765, "eval_combined_score": 0.7617557661362417, "eval_f1": 0.817629179331307, "eval_loss": 0.5969370007514954, "eval_runtime": 0.1629, "eval_samples_per_second": 2504.949, "eval_steps_per_second": 12.279, "step": 45 }, { "epoch": 4.0, "grad_norm": 1.1744054555892944, "learning_rate": 4.600000000000001e-05, "loss": 0.5494, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.7230392156862745, "eval_combined_score": 0.7714079650999155, "eval_f1": 0.8197767145135566, "eval_loss": 0.5884530544281006, "eval_runtime": 0.1682, "eval_samples_per_second": 2426.402, "eval_steps_per_second": 11.894, "step": 60 }, { "epoch": 5.0, "grad_norm": 2.2589306831359863, "learning_rate": 4.5e-05, "loss": 0.5073, "step": 75 }, { "epoch": 5.0, "eval_accuracy": 0.6862745098039216, "eval_combined_score": 0.7335482138060704, "eval_f1": 0.7808219178082192, "eval_loss": 0.6107202172279358, "eval_runtime": 0.1757, "eval_samples_per_second": 2321.618, "eval_steps_per_second": 11.38, "step": 75 }, { "epoch": 6.0, "grad_norm": 1.4866365194320679, "learning_rate": 4.4000000000000006e-05, "loss": 0.4408, "step": 90 }, { "epoch": 6.0, "eval_accuracy": 0.7009803921568627, "eval_combined_score": 0.7474496555378909, "eval_f1": 0.793918918918919, "eval_loss": 0.636547327041626, "eval_runtime": 0.1723, "eval_samples_per_second": 2367.506, "eval_steps_per_second": 11.605, "step": 90 }, { "epoch": 7.0, "grad_norm": 2.2932753562927246, "learning_rate": 4.3e-05, "loss": 0.3565, "step": 105 }, { "epoch": 7.0, "eval_accuracy": 0.7107843137254902, "eval_combined_score": 0.7560655575361458, "eval_f1": 0.8013468013468014, "eval_loss": 0.7094822525978088, "eval_runtime": 0.1637, "eval_samples_per_second": 2492.333, "eval_steps_per_second": 12.217, "step": 105 }, { "epoch": 8.0, "grad_norm": 2.5407330989837646, "learning_rate": 4.2e-05, "loss": 0.2434, "step": 120 }, { "epoch": 8.0, "eval_accuracy": 0.7034313725490197, "eval_combined_score": 0.7486492467515116, "eval_f1": 0.7938671209540034, "eval_loss": 0.8136605024337769, "eval_runtime": 0.1568, "eval_samples_per_second": 2602.642, "eval_steps_per_second": 12.758, "step": 120 }, { "epoch": 9.0, "grad_norm": 3.441915512084961, "learning_rate": 4.1e-05, "loss": 0.1755, "step": 135 }, { "epoch": 9.0, "eval_accuracy": 0.6593137254901961, "eval_combined_score": 0.7016642292276026, "eval_f1": 0.7440147329650092, "eval_loss": 0.9716194272041321, "eval_runtime": 0.1674, "eval_samples_per_second": 2437.535, "eval_steps_per_second": 11.949, "step": 135 }, { "epoch": 9.0, "step": 135, "total_flos": 865692760633344.0, "train_loss": 0.4522992098772967, "train_runtime": 30.6833, "train_samples_per_second": 5977.188, "train_steps_per_second": 24.443 } ], "logging_steps": 1, "max_steps": 750, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 865692760633344.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }