youralien's picture
Upload trained RoBERTa model
58dd4e0 verified
{
"best_metric": 0.5070422535211268,
"best_model_checkpoint": "roberta-Suggestions-goodareas-eval_FeedbackESConv5pp_CARE10pp-sweeps-current/checkpoint-423",
"epoch": 9.0,
"eval_steps": 500,
"global_step": 423,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 2.497709274291992,
"learning_rate": 9.437100067448849e-06,
"loss": 0.1893,
"step": 47
},
{
"epoch": 1.0,
"eval_accuracy": 0.9589216944801027,
"eval_f1": 0.0,
"eval_loss": 0.026493968442082405,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 5.6135,
"eval_samples_per_second": 138.772,
"eval_steps_per_second": 8.729,
"step": 47
},
{
"epoch": 2.0,
"grad_norm": 2.9029781818389893,
"learning_rate": 8.388533393287865e-06,
"loss": 0.1602,
"step": 94
},
{
"epoch": 2.0,
"eval_accuracy": 0.9589216944801027,
"eval_f1": 0.0,
"eval_loss": 0.07899215817451477,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 6.0685,
"eval_samples_per_second": 128.367,
"eval_steps_per_second": 8.074,
"step": 94
},
{
"epoch": 3.0,
"grad_norm": 1.2197129726409912,
"learning_rate": 7.339966719126881e-06,
"loss": 0.1275,
"step": 141
},
{
"epoch": 3.0,
"eval_accuracy": 0.9589216944801027,
"eval_f1": 0.0,
"eval_loss": 0.02892177551984787,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 5.6576,
"eval_samples_per_second": 137.69,
"eval_steps_per_second": 8.661,
"step": 141
},
{
"epoch": 4.0,
"grad_norm": 5.551578044891357,
"learning_rate": 6.291400044965899e-06,
"loss": 0.1118,
"step": 188
},
{
"epoch": 4.0,
"eval_accuracy": 0.9614890885750963,
"eval_f1": 0.11764705882352941,
"eval_loss": 0.04150189831852913,
"eval_precision": 1.0,
"eval_recall": 0.0625,
"eval_runtime": 5.84,
"eval_samples_per_second": 133.391,
"eval_steps_per_second": 8.39,
"step": 188
},
{
"epoch": 5.0,
"grad_norm": 2.566047430038452,
"learning_rate": 5.242833370804916e-06,
"loss": 0.0969,
"step": 235
},
{
"epoch": 5.0,
"eval_accuracy": 0.9602053915275995,
"eval_f1": 0.06060606060606061,
"eval_loss": 0.02676660381257534,
"eval_precision": 1.0,
"eval_recall": 0.03125,
"eval_runtime": 5.7945,
"eval_samples_per_second": 134.439,
"eval_steps_per_second": 8.456,
"step": 235
},
{
"epoch": 6.0,
"grad_norm": 11.057209968566895,
"learning_rate": 4.194266696643932e-06,
"loss": 0.0889,
"step": 282
},
{
"epoch": 6.0,
"eval_accuracy": 0.9563543003851092,
"eval_f1": 0.2608695652173913,
"eval_loss": 0.02810371294617653,
"eval_precision": 0.42857142857142855,
"eval_recall": 0.1875,
"eval_runtime": 5.5694,
"eval_samples_per_second": 139.872,
"eval_steps_per_second": 8.798,
"step": 282
},
{
"epoch": 7.0,
"grad_norm": 21.280832290649414,
"learning_rate": 3.1457000224829495e-06,
"loss": 0.1033,
"step": 329
},
{
"epoch": 7.0,
"eval_accuracy": 0.9589216944801027,
"eval_f1": 0.23809523809523808,
"eval_loss": 0.023050550371408463,
"eval_precision": 0.5,
"eval_recall": 0.15625,
"eval_runtime": 5.7361,
"eval_samples_per_second": 135.806,
"eval_steps_per_second": 8.542,
"step": 329
},
{
"epoch": 8.0,
"grad_norm": 12.596846580505371,
"learning_rate": 2.097133348321966e-06,
"loss": 0.0846,
"step": 376
},
{
"epoch": 8.0,
"eval_accuracy": 0.9563543003851092,
"eval_f1": 0.48484848484848486,
"eval_loss": 0.06229134276509285,
"eval_precision": 0.47058823529411764,
"eval_recall": 0.5,
"eval_runtime": 5.9283,
"eval_samples_per_second": 131.404,
"eval_steps_per_second": 8.265,
"step": 376
},
{
"epoch": 9.0,
"grad_norm": 10.026678085327148,
"learning_rate": 1.048566674160983e-06,
"loss": 0.0636,
"step": 423
},
{
"epoch": 9.0,
"eval_accuracy": 0.9550706033376123,
"eval_f1": 0.5070422535211268,
"eval_loss": 0.07518932968378067,
"eval_precision": 0.46153846153846156,
"eval_recall": 0.5625,
"eval_runtime": 5.4962,
"eval_samples_per_second": 141.734,
"eval_steps_per_second": 8.915,
"step": 423
}
],
"logging_steps": 100,
"max_steps": 470,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4003551654358872.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}