SW2-DMAE / trainer_state.json
Augusto777's picture
End of training
71dab78 verified
{
"best_metric": 0.45652173913043476,
"best_model_checkpoint": "SW2-DMAE\\checkpoint-49",
"epoch": 34.285714285714285,
"eval_steps": 500,
"global_step": 120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.86,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 7.9393720626831055,
"eval_runtime": 0.2251,
"eval_samples_per_second": 204.399,
"eval_steps_per_second": 13.33,
"step": 3
},
{
"epoch": 2.0,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 7.897927761077881,
"eval_runtime": 0.2391,
"eval_samples_per_second": 192.425,
"eval_steps_per_second": 12.549,
"step": 7
},
{
"epoch": 2.86,
"learning_rate": 4.166666666666667e-05,
"loss": 7.935,
"step": 10
},
{
"epoch": 2.86,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 7.767155647277832,
"eval_runtime": 0.2321,
"eval_samples_per_second": 198.231,
"eval_steps_per_second": 12.928,
"step": 10
},
{
"epoch": 4.0,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 7.219719886779785,
"eval_runtime": 0.2351,
"eval_samples_per_second": 195.7,
"eval_steps_per_second": 12.763,
"step": 14
},
{
"epoch": 4.86,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 6.566103935241699,
"eval_runtime": 0.2292,
"eval_samples_per_second": 200.702,
"eval_steps_per_second": 13.089,
"step": 17
},
{
"epoch": 5.71,
"learning_rate": 4.62962962962963e-05,
"loss": 7.0143,
"step": 20
},
{
"epoch": 6.0,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 5.730363368988037,
"eval_runtime": 0.2911,
"eval_samples_per_second": 158.04,
"eval_steps_per_second": 10.307,
"step": 21
},
{
"epoch": 6.86,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 5.118360996246338,
"eval_runtime": 0.2291,
"eval_samples_per_second": 200.828,
"eval_steps_per_second": 13.097,
"step": 24
},
{
"epoch": 8.0,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 4.352573394775391,
"eval_runtime": 0.2536,
"eval_samples_per_second": 181.411,
"eval_steps_per_second": 11.831,
"step": 28
},
{
"epoch": 8.57,
"learning_rate": 4.166666666666667e-05,
"loss": 4.9972,
"step": 30
},
{
"epoch": 8.86,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 3.8116581439971924,
"eval_runtime": 0.2421,
"eval_samples_per_second": 190.04,
"eval_steps_per_second": 12.394,
"step": 31
},
{
"epoch": 10.0,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 3.151806354522705,
"eval_runtime": 0.2396,
"eval_samples_per_second": 192.018,
"eval_steps_per_second": 12.523,
"step": 35
},
{
"epoch": 10.86,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 2.7124626636505127,
"eval_runtime": 0.2381,
"eval_samples_per_second": 193.229,
"eval_steps_per_second": 12.602,
"step": 38
},
{
"epoch": 11.43,
"learning_rate": 3.7037037037037037e-05,
"loss": 3.3803,
"step": 40
},
{
"epoch": 12.0,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 2.2254273891448975,
"eval_runtime": 0.2416,
"eval_samples_per_second": 190.423,
"eval_steps_per_second": 12.419,
"step": 42
},
{
"epoch": 12.86,
"eval_accuracy": 0.10869565217391304,
"eval_loss": 1.9449864625930786,
"eval_runtime": 0.2426,
"eval_samples_per_second": 189.639,
"eval_steps_per_second": 12.368,
"step": 45
},
{
"epoch": 14.0,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.6738574504852295,
"eval_runtime": 0.2396,
"eval_samples_per_second": 192.014,
"eval_steps_per_second": 12.523,
"step": 49
},
{
"epoch": 14.29,
"learning_rate": 3.240740740740741e-05,
"loss": 2.0759,
"step": 50
},
{
"epoch": 14.86,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.5299274921417236,
"eval_runtime": 0.2221,
"eval_samples_per_second": 207.16,
"eval_steps_per_second": 13.51,
"step": 52
},
{
"epoch": 16.0,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.3876434564590454,
"eval_runtime": 0.2486,
"eval_samples_per_second": 185.061,
"eval_steps_per_second": 12.069,
"step": 56
},
{
"epoch": 16.86,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.305933952331543,
"eval_runtime": 0.2481,
"eval_samples_per_second": 185.442,
"eval_steps_per_second": 12.094,
"step": 59
},
{
"epoch": 17.14,
"learning_rate": 2.777777777777778e-05,
"loss": 1.4466,
"step": 60
},
{
"epoch": 18.0,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2341125011444092,
"eval_runtime": 0.2951,
"eval_samples_per_second": 155.897,
"eval_steps_per_second": 10.167,
"step": 63
},
{
"epoch": 18.86,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2120041847229004,
"eval_runtime": 0.2371,
"eval_samples_per_second": 194.049,
"eval_steps_per_second": 12.655,
"step": 66
},
{
"epoch": 20.0,
"learning_rate": 2.314814814814815e-05,
"loss": 1.2349,
"step": 70
},
{
"epoch": 20.0,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2095657587051392,
"eval_runtime": 0.2456,
"eval_samples_per_second": 187.325,
"eval_steps_per_second": 12.217,
"step": 70
},
{
"epoch": 20.86,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2118462324142456,
"eval_runtime": 0.2391,
"eval_samples_per_second": 192.425,
"eval_steps_per_second": 12.549,
"step": 73
},
{
"epoch": 22.0,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2113664150238037,
"eval_runtime": 0.2471,
"eval_samples_per_second": 186.183,
"eval_steps_per_second": 12.142,
"step": 77
},
{
"epoch": 22.86,
"learning_rate": 1.8518518518518518e-05,
"loss": 1.1854,
"step": 80
},
{
"epoch": 22.86,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2140562534332275,
"eval_runtime": 0.2411,
"eval_samples_per_second": 190.828,
"eval_steps_per_second": 12.445,
"step": 80
},
{
"epoch": 24.0,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.211725115776062,
"eval_runtime": 0.2571,
"eval_samples_per_second": 178.948,
"eval_steps_per_second": 11.671,
"step": 84
},
{
"epoch": 24.86,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2102160453796387,
"eval_runtime": 0.2521,
"eval_samples_per_second": 182.498,
"eval_steps_per_second": 11.902,
"step": 87
},
{
"epoch": 25.71,
"learning_rate": 1.388888888888889e-05,
"loss": 1.1878,
"step": 90
},
{
"epoch": 26.0,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2075891494750977,
"eval_runtime": 0.2521,
"eval_samples_per_second": 182.498,
"eval_steps_per_second": 11.902,
"step": 91
},
{
"epoch": 26.86,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2082799673080444,
"eval_runtime": 0.2431,
"eval_samples_per_second": 189.258,
"eval_steps_per_second": 12.343,
"step": 94
},
{
"epoch": 28.0,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2129710912704468,
"eval_runtime": 0.2271,
"eval_samples_per_second": 202.585,
"eval_steps_per_second": 13.212,
"step": 98
},
{
"epoch": 28.57,
"learning_rate": 9.259259259259259e-06,
"loss": 1.1986,
"step": 100
},
{
"epoch": 28.86,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2068954706192017,
"eval_runtime": 0.2281,
"eval_samples_per_second": 201.708,
"eval_steps_per_second": 13.155,
"step": 101
},
{
"epoch": 30.0,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2058416604995728,
"eval_runtime": 0.2551,
"eval_samples_per_second": 180.351,
"eval_steps_per_second": 11.762,
"step": 105
},
{
"epoch": 30.86,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2070415019989014,
"eval_runtime": 0.2396,
"eval_samples_per_second": 192.016,
"eval_steps_per_second": 12.523,
"step": 108
},
{
"epoch": 31.43,
"learning_rate": 4.6296296296296296e-06,
"loss": 1.182,
"step": 110
},
{
"epoch": 32.0,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2074666023254395,
"eval_runtime": 0.2631,
"eval_samples_per_second": 174.865,
"eval_steps_per_second": 11.404,
"step": 112
},
{
"epoch": 32.86,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.2073932886123657,
"eval_runtime": 0.2401,
"eval_samples_per_second": 191.624,
"eval_steps_per_second": 12.497,
"step": 115
},
{
"epoch": 34.0,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.207228422164917,
"eval_runtime": 0.2461,
"eval_samples_per_second": 186.949,
"eval_steps_per_second": 12.192,
"step": 119
},
{
"epoch": 34.29,
"learning_rate": 0.0,
"loss": 1.2064,
"step": 120
},
{
"epoch": 34.29,
"eval_accuracy": 0.45652173913043476,
"eval_loss": 1.207200050354004,
"eval_runtime": 0.2516,
"eval_samples_per_second": 182.854,
"eval_steps_per_second": 11.925,
"step": 120
},
{
"epoch": 34.29,
"step": 120,
"total_flos": 2.3770905934823424e+17,
"train_loss": 2.837039653460185,
"train_runtime": 179.4861,
"train_samples_per_second": 47.469,
"train_steps_per_second": 0.669
}
],
"logging_steps": 10,
"max_steps": 120,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 2.3770905934823424e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}