tooka-bert-large-sentiment / trainer_state.json
IRI2070's picture
Upload folder using huggingface_hub
652ddad verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 10146,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14784151389710232,
"grad_norm": 3.7569432258605957,
"learning_rate": 1.9016361127537947e-05,
"loss": 0.156098388671875,
"step": 500
},
{
"epoch": 0.29568302779420463,
"grad_norm": 0.009413833729922771,
"learning_rate": 1.8030751034890598e-05,
"loss": 0.09492064666748047,
"step": 1000
},
{
"epoch": 0.4435245416913069,
"grad_norm": 0.07881546020507812,
"learning_rate": 1.704514094224325e-05,
"loss": 0.07839873504638672,
"step": 1500
},
{
"epoch": 0.5913660555884093,
"grad_norm": 0.015427447855472565,
"learning_rate": 1.6059530849595903e-05,
"loss": 0.07814859008789063,
"step": 2000
},
{
"epoch": 0.7392075694855116,
"grad_norm": 14.57061767578125,
"learning_rate": 1.5073920756948552e-05,
"loss": 0.08136223602294922,
"step": 2500
},
{
"epoch": 0.8870490833826138,
"grad_norm": 1.0001031160354614,
"learning_rate": 1.4088310664301204e-05,
"loss": 0.07243869018554687,
"step": 3000
},
{
"epoch": 1.0348905972797162,
"grad_norm": 30.364643096923828,
"learning_rate": 1.3102700571653855e-05,
"loss": 0.04469930648803711,
"step": 3500
},
{
"epoch": 1.1827321111768185,
"grad_norm": 0.0182269848883152,
"learning_rate": 1.2117090479006506e-05,
"loss": 0.03440779495239258,
"step": 4000
},
{
"epoch": 1.3305736250739209,
"grad_norm": 0.0005539056146517396,
"learning_rate": 1.1131480386359156e-05,
"loss": 0.0230482120513916,
"step": 4500
},
{
"epoch": 1.4784151389710232,
"grad_norm": 0.739719033241272,
"learning_rate": 1.0145870293711809e-05,
"loss": 0.028042703628540038,
"step": 5000
},
{
"epoch": 1.6262566528681255,
"grad_norm": 0.0063209934160113335,
"learning_rate": 9.16026020106446e-06,
"loss": 0.02548642349243164,
"step": 5500
},
{
"epoch": 1.7740981667652278,
"grad_norm": 0.0015545282512903214,
"learning_rate": 8.17465010841711e-06,
"loss": 0.023311178207397462,
"step": 6000
},
{
"epoch": 1.9219396806623301,
"grad_norm": 0.0024629898834973574,
"learning_rate": 7.189040015769762e-06,
"loss": 0.019577335357666016,
"step": 6500
},
{
"epoch": 2.0697811945594324,
"grad_norm": 0.0016239744145423174,
"learning_rate": 6.203429923122414e-06,
"loss": 0.016429786682128907,
"step": 7000
},
{
"epoch": 2.2176227084565348,
"grad_norm": 33.945587158203125,
"learning_rate": 5.217819830475065e-06,
"loss": 0.00910054111480713,
"step": 7500
},
{
"epoch": 2.365464222353637,
"grad_norm": 0.0004062611551489681,
"learning_rate": 4.2322097378277155e-06,
"loss": 0.008913342475891112,
"step": 8000
},
{
"epoch": 2.5133057362507394,
"grad_norm": 0.00030476730898953974,
"learning_rate": 3.246599645180367e-06,
"loss": 0.006344354152679444,
"step": 8500
},
{
"epoch": 2.6611472501478417,
"grad_norm": 0.0006314264028333127,
"learning_rate": 2.260989552533018e-06,
"loss": 0.0064224090576171875,
"step": 9000
},
{
"epoch": 2.808988764044944,
"grad_norm": 0.0005876660579815507,
"learning_rate": 1.2753794598856695e-06,
"loss": 0.007262358665466309,
"step": 9500
},
{
"epoch": 2.9568302779420463,
"grad_norm": 0.0005308115505613387,
"learning_rate": 2.897693672383205e-07,
"loss": 0.004996685981750488,
"step": 10000
},
{
"epoch": 3.0,
"step": 10146,
"total_flos": 9.454816453507891e+16,
"train_loss": 0.040564874114779687,
"train_runtime": 20432.9457,
"train_samples_per_second": 4.965,
"train_steps_per_second": 0.497
}
],
"logging_steps": 500,
"max_steps": 10146,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.454816453507891e+16,
"train_batch_size": 10,
"trial_name": null,
"trial_params": null
}