unified_binary_rm_1e5_241 / trainer_state.json
Jennny's picture
upload checkpoint-241 to repo root
45fbd18 verified
{
"best_metric": 0.7681013557515837,
"best_model_checkpoint": "./my_unified_model_classification/checkpoint-241",
"epoch": 0.2993788819875776,
"eval_steps": 241,
"global_step": 241,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.062111801242236024,
"grad_norm": 63.5,
"learning_rate": 3.125e-06,
"loss": 1.0657,
"step": 50
},
{
"epoch": 0.12422360248447205,
"grad_norm": 76.0,
"learning_rate": 6.25e-06,
"loss": 0.7557,
"step": 100
},
{
"epoch": 0.18633540372670807,
"grad_norm": 78.5,
"learning_rate": 9.375000000000001e-06,
"loss": 0.6955,
"step": 150
},
{
"epoch": 0.2484472049689441,
"grad_norm": 67.5,
"learning_rate": 9.724137931034484e-06,
"loss": 0.6802,
"step": 200
},
{
"epoch": 0.2993788819875776,
"eval_accuracy": 0.6955305091333074,
"eval_auc": 0.7418841739149382,
"eval_f1": 0.7681013557515837,
"eval_false_negatives": 1514,
"eval_false_positives": 2403,
"eval_loss": 0.5827530026435852,
"eval_precision": 0.7296962879640045,
"eval_recall": 0.8107736532933383,
"eval_runtime": 455.1837,
"eval_samples_per_second": 28.263,
"eval_specificity": 0.5059621710526315,
"eval_steps_per_second": 0.222,
"eval_true_negatives": 2461,
"eval_true_positives": 6487,
"step": 241
},
{
"epoch": 0.2993788819875776,
"step": 241,
"train_accuracy": 0.687,
"train_auc": 0.7550223214285714,
"train_f1": 0.7619771863117871,
"train_false_negatives": 107,
"train_false_positives": 206,
"train_loss": 0.5734015107154846,
"train_precision": 0.7086280056577087,
"train_recall": 0.8240131578947368,
"train_runtime": 35.3914,
"train_samples_per_second": 28.255,
"train_specificity": 0.4744897959183674,
"train_steps_per_second": 0.226,
"train_true_negatives": 186,
"train_true_positives": 501
}
],
"logging_steps": 50,
"max_steps": 1610,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 241,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.3228436661794243e+18,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}