gpts-detector / trainer_state.json
ductuan024's picture
Upload 11 files
3036f16 verified
{
"best_metric": 0.8266331658291457,
"best_model_checkpoint": "results/deberta-v3-large/checkpoint-350",
"epoch": 14.0,
"eval_steps": 500,
"global_step": 350,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.357721209526062,
"learning_rate": 9.9e-06,
"loss": 0.6886,
"step": 25
},
{
"epoch": 1.0,
"eval_accuracy": 0.6490566037735849,
"eval_f1": 0.6193724420190996,
"eval_loss": 0.6591259241104126,
"eval_precision": 0.6715976331360947,
"eval_recall": 0.5746835443037974,
"eval_runtime": 7.9916,
"eval_samples_per_second": 99.479,
"eval_steps_per_second": 0.876,
"step": 25
},
{
"epoch": 2.0,
"grad_norm": 5.484292984008789,
"learning_rate": 9.800000000000001e-06,
"loss": 0.6144,
"step": 50
},
{
"epoch": 2.0,
"eval_accuracy": 0.7081761006289308,
"eval_f1": 0.7121588089330024,
"eval_loss": 0.5845022797584534,
"eval_precision": 0.6982968369829684,
"eval_recall": 0.7265822784810126,
"eval_runtime": 7.2078,
"eval_samples_per_second": 110.298,
"eval_steps_per_second": 0.971,
"step": 50
},
{
"epoch": 3.0,
"grad_norm": 5.340143203735352,
"learning_rate": 9.7e-06,
"loss": 0.5033,
"step": 75
},
{
"epoch": 3.0,
"eval_accuracy": 0.7584905660377359,
"eval_f1": 0.7453580901856764,
"eval_loss": 0.5280972719192505,
"eval_precision": 0.7827298050139275,
"eval_recall": 0.7113924050632912,
"eval_runtime": 7.306,
"eval_samples_per_second": 108.814,
"eval_steps_per_second": 0.958,
"step": 75
},
{
"epoch": 4.0,
"grad_norm": 11.039319038391113,
"learning_rate": 9.600000000000001e-06,
"loss": 0.3689,
"step": 100
},
{
"epoch": 4.0,
"eval_accuracy": 0.7773584905660378,
"eval_f1": 0.7756653992395437,
"eval_loss": 0.5035372972488403,
"eval_precision": 0.7766497461928934,
"eval_recall": 0.7746835443037975,
"eval_runtime": 7.6101,
"eval_samples_per_second": 104.467,
"eval_steps_per_second": 0.92,
"step": 100
},
{
"epoch": 5.0,
"grad_norm": 10.600135803222656,
"learning_rate": 9.5e-06,
"loss": 0.2588,
"step": 125
},
{
"epoch": 5.0,
"eval_accuracy": 0.7773584905660378,
"eval_f1": 0.7385524372230429,
"eval_loss": 0.5942177772521973,
"eval_precision": 0.8865248226950354,
"eval_recall": 0.6329113924050633,
"eval_runtime": 7.274,
"eval_samples_per_second": 109.293,
"eval_steps_per_second": 0.962,
"step": 125
},
{
"epoch": 6.0,
"grad_norm": 29.822813034057617,
"learning_rate": 9.4e-06,
"loss": 0.2093,
"step": 150
},
{
"epoch": 6.0,
"eval_accuracy": 0.7849056603773585,
"eval_f1": 0.7466666666666667,
"eval_loss": 0.6665811538696289,
"eval_precision": 0.9,
"eval_recall": 0.6379746835443038,
"eval_runtime": 7.3649,
"eval_samples_per_second": 107.944,
"eval_steps_per_second": 0.95,
"step": 150
},
{
"epoch": 7.0,
"grad_norm": 8.441102981567383,
"learning_rate": 9.3e-06,
"loss": 0.1578,
"step": 175
},
{
"epoch": 7.0,
"eval_accuracy": 0.8125786163522013,
"eval_f1": 0.8082368082368082,
"eval_loss": 0.5872107744216919,
"eval_precision": 0.8219895287958116,
"eval_recall": 0.7949367088607595,
"eval_runtime": 7.4254,
"eval_samples_per_second": 107.065,
"eval_steps_per_second": 0.943,
"step": 175
},
{
"epoch": 8.0,
"grad_norm": 8.99835205078125,
"learning_rate": 9.200000000000002e-06,
"loss": 0.147,
"step": 200
},
{
"epoch": 8.0,
"eval_accuracy": 0.769811320754717,
"eval_f1": 0.7197549770290965,
"eval_loss": 0.7519409656524658,
"eval_precision": 0.9108527131782945,
"eval_recall": 0.5949367088607594,
"eval_runtime": 7.7545,
"eval_samples_per_second": 102.521,
"eval_steps_per_second": 0.903,
"step": 200
},
{
"epoch": 9.0,
"grad_norm": 1.7589746713638306,
"learning_rate": 9.100000000000001e-06,
"loss": 0.1021,
"step": 225
},
{
"epoch": 9.0,
"eval_accuracy": 0.810062893081761,
"eval_f1": 0.7945578231292517,
"eval_loss": 0.6721034646034241,
"eval_precision": 0.8588235294117647,
"eval_recall": 0.739240506329114,
"eval_runtime": 7.6115,
"eval_samples_per_second": 104.448,
"eval_steps_per_second": 0.92,
"step": 225
},
{
"epoch": 10.0,
"grad_norm": 5.351127624511719,
"learning_rate": 9e-06,
"loss": 0.088,
"step": 250
},
{
"epoch": 10.0,
"eval_accuracy": 0.8176100628930818,
"eval_f1": 0.8119325551232166,
"eval_loss": 0.6214590668678284,
"eval_precision": 0.8324468085106383,
"eval_recall": 0.7924050632911392,
"eval_runtime": 7.3972,
"eval_samples_per_second": 107.472,
"eval_steps_per_second": 0.946,
"step": 250
},
{
"epoch": 11.0,
"grad_norm": 3.241373300552368,
"learning_rate": 8.900000000000001e-06,
"loss": 0.0751,
"step": 275
},
{
"epoch": 11.0,
"eval_accuracy": 0.8125786163522013,
"eval_f1": 0.8171779141104294,
"eval_loss": 0.7023480534553528,
"eval_precision": 0.7928571428571428,
"eval_recall": 0.8430379746835444,
"eval_runtime": 7.2686,
"eval_samples_per_second": 109.375,
"eval_steps_per_second": 0.963,
"step": 275
},
{
"epoch": 12.0,
"grad_norm": 34.19482421875,
"learning_rate": 8.8e-06,
"loss": 0.0611,
"step": 300
},
{
"epoch": 12.0,
"eval_accuracy": 0.8,
"eval_f1": 0.774468085106383,
"eval_loss": 0.9275862574577332,
"eval_precision": 0.8806451612903226,
"eval_recall": 0.6911392405063291,
"eval_runtime": 7.2012,
"eval_samples_per_second": 110.399,
"eval_steps_per_second": 0.972,
"step": 300
},
{
"epoch": 13.0,
"grad_norm": 16.30063247680664,
"learning_rate": 8.700000000000001e-06,
"loss": 0.0568,
"step": 325
},
{
"epoch": 13.0,
"eval_accuracy": 0.8113207547169812,
"eval_f1": 0.8026315789473685,
"eval_loss": 0.7658352255821228,
"eval_precision": 0.8356164383561644,
"eval_recall": 0.7721518987341772,
"eval_runtime": 7.2249,
"eval_samples_per_second": 110.036,
"eval_steps_per_second": 0.969,
"step": 325
},
{
"epoch": 14.0,
"grad_norm": 0.775602400302887,
"learning_rate": 8.6e-06,
"loss": 0.0531,
"step": 350
},
{
"epoch": 14.0,
"eval_accuracy": 0.8264150943396227,
"eval_f1": 0.8266331658291457,
"eval_loss": 0.7808425426483154,
"eval_precision": 0.8204488778054863,
"eval_recall": 0.8329113924050633,
"eval_runtime": 7.7991,
"eval_samples_per_second": 101.935,
"eval_steps_per_second": 0.898,
"step": 350
}
],
"logging_steps": 500,
"max_steps": 2500,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.138632372487107e+16,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}