| { | |
| "best_metric": 0.5884530544281006, | |
| "best_model_checkpoint": "tiny_bert_km_100_v1_mrpc/checkpoint-60", | |
| "epoch": 9.0, | |
| "eval_steps": 500, | |
| "global_step": 135, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.8016844987869263, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.6262, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.696078431372549, | |
| "eval_combined_score": 0.7511642156862746, | |
| "eval_f1": 0.80625, | |
| "eval_loss": 0.6085503697395325, | |
| "eval_runtime": 0.2153, | |
| "eval_samples_per_second": 1894.685, | |
| "eval_steps_per_second": 9.288, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.281128168106079, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.599, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7034313725490197, | |
| "eval_combined_score": 0.7612822034643454, | |
| "eval_f1": 0.8191330343796711, | |
| "eval_loss": 0.6000078320503235, | |
| "eval_runtime": 0.1728, | |
| "eval_samples_per_second": 2360.938, | |
| "eval_steps_per_second": 11.573, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.0308752059936523, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.5725, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7058823529411765, | |
| "eval_combined_score": 0.7617557661362417, | |
| "eval_f1": 0.817629179331307, | |
| "eval_loss": 0.5969370007514954, | |
| "eval_runtime": 0.1629, | |
| "eval_samples_per_second": 2504.949, | |
| "eval_steps_per_second": 12.279, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.1744054555892944, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.5494, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7230392156862745, | |
| "eval_combined_score": 0.7714079650999155, | |
| "eval_f1": 0.8197767145135566, | |
| "eval_loss": 0.5884530544281006, | |
| "eval_runtime": 0.1682, | |
| "eval_samples_per_second": 2426.402, | |
| "eval_steps_per_second": 11.894, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.2589306831359863, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.5073, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.6862745098039216, | |
| "eval_combined_score": 0.7335482138060704, | |
| "eval_f1": 0.7808219178082192, | |
| "eval_loss": 0.6107202172279358, | |
| "eval_runtime": 0.1757, | |
| "eval_samples_per_second": 2321.618, | |
| "eval_steps_per_second": 11.38, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 1.4866365194320679, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.4408, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7009803921568627, | |
| "eval_combined_score": 0.7474496555378909, | |
| "eval_f1": 0.793918918918919, | |
| "eval_loss": 0.636547327041626, | |
| "eval_runtime": 0.1723, | |
| "eval_samples_per_second": 2367.506, | |
| "eval_steps_per_second": 11.605, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 2.2932753562927246, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.3565, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7107843137254902, | |
| "eval_combined_score": 0.7560655575361458, | |
| "eval_f1": 0.8013468013468014, | |
| "eval_loss": 0.7094822525978088, | |
| "eval_runtime": 0.1637, | |
| "eval_samples_per_second": 2492.333, | |
| "eval_steps_per_second": 12.217, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 2.5407330989837646, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.2434, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7034313725490197, | |
| "eval_combined_score": 0.7486492467515116, | |
| "eval_f1": 0.7938671209540034, | |
| "eval_loss": 0.8136605024337769, | |
| "eval_runtime": 0.1568, | |
| "eval_samples_per_second": 2602.642, | |
| "eval_steps_per_second": 12.758, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 3.441915512084961, | |
| "learning_rate": 4.1e-05, | |
| "loss": 0.1755, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.6593137254901961, | |
| "eval_combined_score": 0.7016642292276026, | |
| "eval_f1": 0.7440147329650092, | |
| "eval_loss": 0.9716194272041321, | |
| "eval_runtime": 0.1674, | |
| "eval_samples_per_second": 2437.535, | |
| "eval_steps_per_second": 11.949, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "step": 135, | |
| "total_flos": 865692760633344.0, | |
| "train_loss": 0.4522992098772967, | |
| "train_runtime": 30.6833, | |
| "train_samples_per_second": 5977.188, | |
| "train_steps_per_second": 24.443 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 865692760633344.0, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |