ReCasePunct-1-Flash / trainer_state.json
MihaiPopa-1's picture
Upload folder using huggingface_hub
ade96c4 verified
{
"best_global_step": 11655,
"best_metric": 0.06289209425449371,
"best_model_checkpoint": "./results_albert_punctuation_casing/checkpoint-11655",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 11655,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1287001287001287,
"grad_norm": 4.2918195724487305,
"learning_rate": 1.9143715143715144e-05,
"loss": 0.2105482635498047,
"step": 500
},
{
"epoch": 0.2574002574002574,
"grad_norm": 3.180814504623413,
"learning_rate": 1.8285714285714288e-05,
"loss": 0.09811102294921875,
"step": 1000
},
{
"epoch": 0.3861003861003861,
"grad_norm": 4.069736480712891,
"learning_rate": 1.742771342771343e-05,
"loss": 0.08830223083496094,
"step": 1500
},
{
"epoch": 0.5148005148005148,
"grad_norm": 2.3238089084625244,
"learning_rate": 1.656971256971257e-05,
"loss": 0.07262681579589844,
"step": 2000
},
{
"epoch": 0.6435006435006435,
"grad_norm": 0.725167989730835,
"learning_rate": 1.5711711711711713e-05,
"loss": 0.07961682891845703,
"step": 2500
},
{
"epoch": 0.7722007722007722,
"grad_norm": 13.671622276306152,
"learning_rate": 1.4853710853710854e-05,
"loss": 0.06993846893310547,
"step": 3000
},
{
"epoch": 0.9009009009009009,
"grad_norm": 2.6098945140838623,
"learning_rate": 1.3995709995709996e-05,
"loss": 0.07217549133300781,
"step": 3500
},
{
"epoch": 1.0,
"eval_casing_accuracy": 0.6387912059001499,
"eval_loss": 0.07048454880714417,
"eval_overall_accuracy": 0.6404223412931571,
"eval_punctuation_accuracy": 0.6420534766861643,
"eval_runtime": 86.2448,
"eval_samples_per_second": 180.162,
"eval_steps_per_second": 11.27,
"step": 3885
},
{
"epoch": 1.0296010296010296,
"grad_norm": 1.678989052772522,
"learning_rate": 1.3137709137709139e-05,
"loss": 0.05899927520751953,
"step": 4000
},
{
"epoch": 1.1583011583011582,
"grad_norm": 5.855215549468994,
"learning_rate": 1.2279708279708281e-05,
"loss": 0.05248377227783203,
"step": 4500
},
{
"epoch": 1.287001287001287,
"grad_norm": 20.59808921813965,
"learning_rate": 1.1421707421707422e-05,
"loss": 0.05537939834594727,
"step": 5000
},
{
"epoch": 1.4157014157014158,
"grad_norm": 3.922346830368042,
"learning_rate": 1.0563706563706564e-05,
"loss": 0.05087580490112305,
"step": 5500
},
{
"epoch": 1.5444015444015444,
"grad_norm": 0.129458948969841,
"learning_rate": 9.705705705705706e-06,
"loss": 0.0524902229309082,
"step": 6000
},
{
"epoch": 1.673101673101673,
"grad_norm": 0.10066387057304382,
"learning_rate": 8.847704847704849e-06,
"loss": 0.04880419921875,
"step": 6500
},
{
"epoch": 1.8018018018018018,
"grad_norm": 1.1645872592926025,
"learning_rate": 7.989703989703991e-06,
"loss": 0.04735799407958984,
"step": 7000
},
{
"epoch": 1.9305019305019306,
"grad_norm": 1.6507762670516968,
"learning_rate": 7.1317031317031325e-06,
"loss": 0.05284581756591797,
"step": 7500
},
{
"epoch": 2.0,
"eval_casing_accuracy": 0.6404749585638992,
"eval_loss": 0.06381073594093323,
"eval_overall_accuracy": 0.6414089151195728,
"eval_punctuation_accuracy": 0.6423428716752462,
"eval_runtime": 85.5043,
"eval_samples_per_second": 181.722,
"eval_steps_per_second": 11.368,
"step": 7770
},
{
"epoch": 2.0592020592020592,
"grad_norm": 0.3554779887199402,
"learning_rate": 6.273702273702275e-06,
"loss": 0.042236793518066404,
"step": 8000
},
{
"epoch": 2.187902187902188,
"grad_norm": 0.3045165240764618,
"learning_rate": 5.415701415701416e-06,
"loss": 0.03922730255126953,
"step": 8500
},
{
"epoch": 2.3166023166023164,
"grad_norm": 1.6675119400024414,
"learning_rate": 4.557700557700558e-06,
"loss": 0.034516990661621096,
"step": 9000
},
{
"epoch": 2.4453024453024454,
"grad_norm": 1.5336593389511108,
"learning_rate": 3.6996996996997e-06,
"loss": 0.03220732116699219,
"step": 9500
},
{
"epoch": 2.574002574002574,
"grad_norm": 2.9998581409454346,
"learning_rate": 2.8416988416988417e-06,
"loss": 0.033812404632568356,
"step": 10000
},
{
"epoch": 2.7027027027027026,
"grad_norm": 3.3144118785858154,
"learning_rate": 1.9836979836979837e-06,
"loss": 0.03330759048461914,
"step": 10500
},
{
"epoch": 2.8314028314028317,
"grad_norm": 2.0357117652893066,
"learning_rate": 1.1256971256971258e-06,
"loss": 0.03533472442626953,
"step": 11000
},
{
"epoch": 2.9601029601029603,
"grad_norm": 3.5177860260009766,
"learning_rate": 2.676962676962677e-07,
"loss": 0.0314073543548584,
"step": 11500
},
{
"epoch": 3.0,
"eval_casing_accuracy": 0.6400978681235804,
"eval_loss": 0.06289209425449371,
"eval_overall_accuracy": 0.6402776437986162,
"eval_punctuation_accuracy": 0.6404574194736519,
"eval_runtime": 85.5898,
"eval_samples_per_second": 181.54,
"eval_steps_per_second": 11.356,
"step": 11655
}
],
"logging_steps": 500,
"max_steps": 11655,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}